Beispiel #1
0
    def apply(self, **kwargs: Any) -> None:
        source = self.document["source"]
        msgstr = ""

        # XXX check if this is reliable
        assert source.startswith(self.env.srcdir)
        docname = path.splitext(
            relative_path(path.join(self.env.srcdir, "dummy"), source))[0]
        textdomain = docname_to_domain(docname, self.config.gettext_compact)

        # fetch translations
        dirs = [
            path.join(self.env.srcdir, directory)
            for directory in self.config.locale_dirs
        ]
        # sphinx.locale.init changes its args type by version
        # so, ignore mypy check for the following call
        catalog, has_catalog = init_locale(
            dirs,
            self.config.language,
            textdomain  # type: ignore
        )
        if not has_catalog:
            return

        # for translated text keep original as ORIGINAL_TEXT_ATTR attribute
        for node, msg in extract_messages(self.document):
            msgstr = catalog.gettext(msg)
            if not msgstr or msgstr == msg or not msgstr.strip():
                # as-of-yet untranslated
                continue
            # self.logger.info("add %s attr to node." % ORIGINAL_TEXT_ATTR)
            node[ORIGINAL_TEXT_ATTR] = msg
Beispiel #2
0
def test_extract_messages_without_rawsource():
    """
    Check node.rawsource is fall-backed by using node.astext() value.

    `extract_message` which is used from Sphinx i18n feature drop ``not node.rawsource``
    nodes. So, all nodes which want to translate must have ``rawsource`` value.
    However, sometimes node.rawsource is not set.

    For example: recommonmark-0.2.0 doesn't set rawsource to `paragraph` node.

    refs #1994: Fall back to node's astext() during i18n message extraction.
    """
    p = nodes.paragraph()
    p.append(nodes.Text('test'))
    p.append(nodes.Text('sentence'))
    assert not p.rawsource  # target node must not have rawsource value
    document = create_new_document()
    document.append(p)
    _transform(document)
    assert_node_count(extract_messages(document), nodes.TextElement, 1)
    assert [m for n, m in extract_messages(document)][0], 'text sentence'
Beispiel #3
0
def test_extract_messages_without_rawsource():
    """
    Check node.rawsource is fall-backed by using node.astext() value.

    `extract_message` which is used from Sphinx i18n feature drop ``not node.rawsource``
    nodes. So, all nodes which want to translate must have ``rawsource`` value.
    However, sometimes node.rawsource is not set.

    For example: recommonmark-0.2.0 doesn't set rawsource to `paragraph` node.

    refs #1994: Fall back to node's astext() during i18n message extraction.
    """
    p = nodes.paragraph()
    p.append(nodes.Text('test'))
    p.append(nodes.Text('sentence'))
    assert not p.rawsource  # target node must not have rawsource value
    document = create_new_document()
    document.append(p)
    _transform(document)
    assert_node_count(extract_messages(document), nodes.TextElement, 1)
    assert [m for n, m in extract_messages(document)][0], 'text sentence'
Beispiel #4
0
    def write_doc(self, docname: str, doctree: nodes.document) -> None:
        catalog = self.catalogs[docname_to_domain(docname, self.config.gettext_compact)]

        for toctree in self.env.tocs[docname].traverse(addnodes.toctree):
            for node, msg in extract_messages(toctree):
                node.uid = ''  # type: ignore  # Hack UUID model
                catalog.add(msg, node)

        for node, msg in extract_messages(doctree):
            catalog.add(msg, node)

        if 'index' in self.env.config.gettext_additional_targets:
            # Extract translatable messages from index entries.
            for node, entries in traverse_translatable_index(doctree):
                for typ, msg, tid, main, key_ in entries:
                    for m in split_index_msg(typ, msg):
                        if typ == 'pair' and m in pairindextypes.values():
                            # avoid built-in translated message was incorporated
                            # in 'sphinx.util.nodes.process_index_entry'
                            continue
                        catalog.add(m, node)
Beispiel #5
0
    def write_doc(self, docname, doctree):
        # type: (str, nodes.document) -> None
        catalog = self.catalogs[find_catalog(docname, self.config.gettext_compact)]

        for toctree in self.env.tocs[docname].traverse(addnodes.toctree):
            for node, msg in extract_messages(toctree):
                node.uid = ''  # type: ignore  # Hack UUID model
                catalog.add(msg, node)

        for node, msg in extract_messages(doctree):
            catalog.add(msg, node)

        if 'index' in self.env.config.gettext_additional_targets:
            # Extract translatable messages from index entries.
            for node, entries in traverse_translatable_index(doctree):
                for typ, msg, tid, main, key_ in entries:
                    for m in split_index_msg(typ, msg):
                        if typ == 'pair' and m in pairindextypes.values():
                            # avoid built-in translated message was incorporated
                            # in 'sphinx.util.nodes.process_index_entry'
                            continue
                        catalog.add(m, node)
Beispiel #6
0
    def write_doc(self, docname, doctree):
        catalog = self.catalogs[find_catalog(docname, self.config.gettext_compact)]

        for node, msg in extract_messages(doctree):
            catalog.add(msg, node)

        if "index" in self.env.config.gettext_additional_targets:
            # Extract translatable messages from index entries.
            for node, entries in traverse_translatable_index(doctree):
                for typ, msg, tid, main in entries:
                    for m in split_index_msg(typ, msg):
                        if typ == "pair" and m in pairindextypes.values():
                            # avoid built-in translated message was incorporated
                            # in 'sphinx.util.nodes.process_index_entry'
                            continue
                        catalog.add(m, node)
Beispiel #7
0
    def write_doc(self, docname, doctree):
        catalog = self.catalogs[find_catalog(docname,
                                             self.config.gettext_compact)]

        for node, msg in extract_messages(doctree):
            catalog.add(msg, node)

        # Extract translatable messages from index entries.
        for node, entries in traverse_translatable_index(doctree):
            for typ, msg, tid, main in entries:
                for m in split_index_msg(typ, msg):
                    if typ == 'pair' and m in pairindextypes.values():
                        # avoid built-in translated message was incorporated
                        # in 'sphinx.util.nodes.process_index_entry'
                        continue
                    catalog.add(m, node)
Beispiel #8
0
def test_extract_messages():
    text = dedent(
        """
        .. admonition:: admonition title

           admonition body
        """
    )
    yield (
        assert_node_count,
        extract_messages(_get_doctree(text)),
        nodes.title, 1,
    )

    text = dedent(
        """
        .. figure:: foo.jpg

           this is title
        """
    )
    yield (
        assert_node_count,
        extract_messages(_get_doctree(text)),
        nodes.caption, 1,
    )

    text = dedent(
        """
        .. rubric:: spam
        """
    )
    yield (
        assert_node_count,
        extract_messages(_get_doctree(text)),
        nodes.rubric, 1,
    )

    text = dedent(
        """
        | spam
        | egg
        """
    )
    yield (
        assert_node_count,
        extract_messages(_get_doctree(text)),
        nodes.line, 2,
    )

    text = dedent(
        """
        section
        =======

        +----------------+
        | | **Title 1**  |
        | | Message 1    |
        +----------------+
        """
    )
    yield (
        assert_node_count,
        extract_messages(_get_doctree(text)),
        nodes.line, 2,
    )

    text = dedent(
        """
        * | **Title 1**
          | Message 1
        """
    )
    yield (
        assert_node_count,
        extract_messages(_get_doctree(text)),
        nodes.line, 2,
    )
Beispiel #9
0
def test_extract_messages():
    text = dedent(
        """
        .. admonition:: admonition title

           admonition body
        """
    )
    yield (
        assert_node_count,
        extract_messages(_get_doctree(text)),
        nodes.title, 1,
    )

    text = dedent(
        """
        .. figure:: foo.jpg

           this is title
        """
    )
    yield (
        assert_node_count,
        extract_messages(_get_doctree(text)),
        nodes.caption, 1,
    )

    text = dedent(
        """
        .. rubric:: spam
        """
    )
    yield (
        assert_node_count,
        extract_messages(_get_doctree(text)),
        nodes.rubric, 1,
    )


    text = dedent(
        """
        | spam
        | egg
        """
    )
    yield (
        assert_node_count,
        extract_messages(_get_doctree(text)),
        nodes.line, 2,
    )


    text = dedent(
        """
        section
        =======

        +----------------+
        | | **Title 1**  |
        | | Message 1    |
        +----------------+
        """
    )
    yield (
        assert_node_count,
        extract_messages(_get_doctree(text)),
        nodes.line, 2,
    )


    text = dedent(
        """
        * | **Title 1**
          | Message 1
        """
    )
    yield (
        assert_node_count,
        extract_messages(_get_doctree(text)),
        nodes.line, 2,
    )
Beispiel #10
0
    def write_doc(self, docname, doctree):
        catalog = self.catalogs[find_catalog(docname,
                                             self.config.gettext_compact)]

        for node, msg in extract_messages(doctree):
            catalog.add(msg, node)
Beispiel #11
0
    def apply(self, **kwargs):
        # type: (Any) -> None
        settings, source = self.document.settings, self.document['source']
        msgstr = u''

        # XXX check if this is reliable
        assert source.startswith(self.env.srcdir)
        docname = path.splitext(relative_path(path.join(self.env.srcdir, 'dummy'),
                                              source))[0]
        textdomain = find_catalog(docname, self.config.gettext_compact)

        # fetch translations
        dirs = [path.join(self.env.srcdir, directory)
                for directory in self.config.locale_dirs]
        catalog, has_catalog = init_locale(dirs, self.config.language, textdomain)
        if not has_catalog:
            return

        # phase1: replace reference ids with translated names
        for node, msg in extract_messages(self.document):
            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg or not msgstr.strip():
                # as-of-yet untranslated
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            # literalblock need literal block notation to avoid it become
            # paragraph.
            if isinstance(node, LITERAL_TYPE_NODES):
                msgstr = '::\n\n' + indent(msgstr, ' ' * 3)

            patch = publish_msgstr(self.app, msgstr, source,
                                   node.line, self.config, settings)
            # XXX doctest and other block markup
            if not isinstance(patch, nodes.paragraph):
                continue  # skip for now

            processed = False  # skip flag

            # update title(section) target name-id mapping
            if isinstance(node, nodes.title):
                section_node = node.parent
                new_name = nodes.fully_normalize_name(patch.astext())
                old_name = nodes.fully_normalize_name(node.astext())

                if old_name != new_name:
                    # if name would be changed, replace node names and
                    # document nameids mapping with new name.
                    names = section_node.setdefault('names', [])
                    names.append(new_name)
                    # Original section name (reference target name) should be kept to refer
                    # from other nodes which is still not translated or uses explicit target
                    # name like "`text to display <explicit target name_>`_"..
                    # So, `old_name` is still exist in `names`.

                    _id = self.document.nameids.get(old_name, None)
                    explicit = self.document.nametypes.get(old_name, None)

                    # * if explicit: _id is label. title node need another id.
                    # * if not explicit:
                    #
                    #   * if _id is None:
                    #
                    #     _id is None means:
                    #
                    #     1. _id was not provided yet.
                    #
                    #     2. _id was duplicated.
                    #
                    #        old_name entry still exists in nameids and
                    #        nametypes for another duplicated entry.
                    #
                    #   * if _id is provided: bellow process
                    if _id:
                        if not explicit:
                            # _id was not duplicated.
                            # remove old_name entry from document ids database
                            # to reuse original _id.
                            self.document.nameids.pop(old_name, None)
                            self.document.nametypes.pop(old_name, None)
                            self.document.ids.pop(_id, None)

                        # re-entry with new named section node.
                        #
                        # Note: msgnode that is a second parameter of the
                        # `note_implicit_target` is not necessary here because
                        # section_node has been noted previously on rst parsing by
                        # `docutils.parsers.rst.states.RSTState.new_subsection()`
                        # and already has `system_message` if needed.
                        self.document.note_implicit_target(section_node)

                    # replace target's refname to new target name
                    matcher = NodeMatcher(nodes.target, refname=old_name)
                    for old_target in self.document.traverse(matcher):  # type: nodes.target
                        old_target['refname'] = new_name

                    processed = True

            # glossary terms update refid
            if isinstance(node, nodes.term):
                gloss_entries = self.env.temp_data.setdefault('gloss_entries', set())
                for _id in node['names']:
                    if _id in gloss_entries:
                        gloss_entries.remove(_id)

                    parts = split_term_classifiers(msgstr)
                    patch = publish_msgstr(self.app, parts[0], source,
                                           node.line, self.config, settings)
                    patch = make_glossary_term(self.env, patch, parts[1],
                                               source, node.line, _id)
                    node['ids'] = patch['ids']
                    node['names'] = patch['names']
                    processed = True

            # update leaves with processed nodes
            if processed:
                for child in patch.children:
                    child.parent = node
                node.children = patch.children
                node['translated'] = True  # to avoid double translation

        # phase2: translation
        for node, msg in extract_messages(self.document):
            if node.get('translated', False):  # to avoid double translation
                continue  # skip if the node is already translated by phase1

            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg:  # as-of-yet untranslated
                continue

            # update translatable nodes
            if isinstance(node, addnodes.translatable):
                node.apply_translated_message(msg, msgstr)
                continue

            # update meta nodes
            if isinstance(node, nodes.pending) and is_pending_meta(node):
                node.details['nodes'][0]['content'] = msgstr
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            # literalblock need literal block notation to avoid it become
            # paragraph.
            if isinstance(node, LITERAL_TYPE_NODES):
                msgstr = '::\n\n' + indent(msgstr, ' ' * 3)

            # Structural Subelements phase1
            # There is a possibility that only the title node is created.
            # see: http://docutils.sourceforge.net/docs/ref/doctree.html#structural-subelements
            if isinstance(node, nodes.title):
                # This generates: <section ...><title>msgstr</title></section>
                msgstr = msgstr + '\n' + '-' * len(msgstr) * 2

            patch = publish_msgstr(self.app, msgstr, source,
                                   node.line, self.config, settings)

            # Structural Subelements phase2
            if isinstance(node, nodes.title):
                # get <title> node that placed as a first child
                patch = patch.next_node()

            # ignore unexpected markups in translation message
            unexpected = (
                nodes.paragraph,    # expected form of translation
                nodes.title         # generated by above "Subelements phase2"
            )  # type: Tuple[Type[nodes.Element], ...]

            # following types are expected if
            # config.gettext_additional_targets is configured
            unexpected += LITERAL_TYPE_NODES
            unexpected += IMAGE_TYPE_NODES

            if not isinstance(patch, unexpected):
                continue  # skip

            # auto-numbered foot note reference should use original 'ids'.
            def list_replace_or_append(lst, old, new):
                # type: (List[N], N, N) -> None
                if old in lst:
                    lst[lst.index(old)] = new
                else:
                    lst.append(new)

            is_autofootnote_ref = NodeMatcher(nodes.footnote_reference, auto=Any)
            old_foot_refs = node.traverse(is_autofootnote_ref)  # type: List[nodes.footnote_reference]  # NOQA
            new_foot_refs = patch.traverse(is_autofootnote_ref)  # type: List[nodes.footnote_reference]  # NOQA
            if len(old_foot_refs) != len(new_foot_refs):
                old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs]
                new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs]
                logger.warning(__('inconsistent footnote references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_foot_ref_rawsources, new_foot_ref_rawsources),
                               location=node)
            old_foot_namerefs = {}  # type: Dict[str, List[nodes.footnote_reference]]
            for r in old_foot_refs:
                old_foot_namerefs.setdefault(r.get('refname'), []).append(r)
            for newf in new_foot_refs:
                refname = newf.get('refname')
                refs = old_foot_namerefs.get(refname, [])
                if not refs:
                    continue

                oldf = refs.pop(0)
                newf['ids'] = oldf['ids']
                for id in newf['ids']:
                    self.document.ids[id] = newf

                if newf['auto'] == 1:
                    # autofootnote_refs
                    list_replace_or_append(self.document.autofootnote_refs, oldf, newf)
                else:
                    # symbol_footnote_refs
                    list_replace_or_append(self.document.symbol_footnote_refs, oldf, newf)

                if refname:
                    footnote_refs = self.document.footnote_refs.setdefault(refname, [])
                    list_replace_or_append(footnote_refs, oldf, newf)

                    refnames = self.document.refnames.setdefault(refname, [])
                    list_replace_or_append(refnames, oldf, newf)

            # reference should use new (translated) 'refname'.
            # * reference target ".. _Python: ..." is not translatable.
            # * use translated refname for section refname.
            # * inline reference "`Python <...>`_" has no 'refname'.
            is_refnamed_ref = NodeMatcher(nodes.reference, refname=Any)
            old_refs = node.traverse(is_refnamed_ref)  # type: List[nodes.reference]
            new_refs = patch.traverse(is_refnamed_ref)  # type: List[nodes.reference]
            if len(old_refs) != len(new_refs):
                old_ref_rawsources = [ref.rawsource for ref in old_refs]
                new_ref_rawsources = [ref.rawsource for ref in new_refs]
                logger.warning(__('inconsistent references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_ref_rawsources, new_ref_rawsources),
                               location=node)
            old_ref_names = [r['refname'] for r in old_refs]
            new_ref_names = [r['refname'] for r in new_refs]
            orphans = list(set(old_ref_names) - set(new_ref_names))
            for newr in new_refs:
                if not self.document.has_name(newr['refname']):
                    # Maybe refname is translated but target is not translated.
                    # Note: multiple translated refnames break link ordering.
                    if orphans:
                        newr['refname'] = orphans.pop(0)
                    else:
                        # orphan refnames is already empty!
                        # reference number is same in new_refs and old_refs.
                        pass

                self.document.note_refname(newr)

            # refnamed footnote should use original 'ids'.
            is_refnamed_footnote_ref = NodeMatcher(nodes.footnote_reference, refname=Any)
            old_foot_refs = node.traverse(is_refnamed_footnote_ref)
            new_foot_refs = patch.traverse(is_refnamed_footnote_ref)
            refname_ids_map = {}  # type: Dict[str, List[str]]
            if len(old_foot_refs) != len(new_foot_refs):
                old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs]
                new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs]
                logger.warning(__('inconsistent footnote references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_foot_ref_rawsources, new_foot_ref_rawsources),
                               location=node)
            for oldf in old_foot_refs:
                refname_ids_map.setdefault(oldf["refname"], []).append(oldf["ids"])
            for newf in new_foot_refs:
                refname = newf["refname"]
                if refname_ids_map.get(refname):
                    newf["ids"] = refname_ids_map[refname].pop(0)

            # citation should use original 'ids'.
            is_citation_ref = NodeMatcher(nodes.citation_reference, refname=Any)
            old_cite_refs = node.traverse(is_citation_ref)  # type: List[nodes.citation_reference]  # NOQA
            new_cite_refs = patch.traverse(is_citation_ref)  # type: List[nodes.citation_reference]  # NOQA
            refname_ids_map = {}
            if len(old_cite_refs) != len(new_cite_refs):
                old_cite_ref_rawsources = [ref.rawsource for ref in old_cite_refs]
                new_cite_ref_rawsources = [ref.rawsource for ref in new_cite_refs]
                logger.warning(__('inconsistent citation references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_cite_ref_rawsources, new_cite_ref_rawsources),
                               location=node)
            for oldc in old_cite_refs:
                refname_ids_map.setdefault(oldc["refname"], []).append(oldc["ids"])
            for newc in new_cite_refs:
                refname = newc["refname"]
                if refname_ids_map.get(refname):
                    newc["ids"] = refname_ids_map[refname].pop()

            # Original pending_xref['reftarget'] contain not-translated
            # target name, new pending_xref must use original one.
            # This code restricts to change ref-targets in the translation.
            old_xrefs = node.traverse(addnodes.pending_xref)
            new_xrefs = patch.traverse(addnodes.pending_xref)
            xref_reftarget_map = {}
            if len(old_xrefs) != len(new_xrefs):
                old_xref_rawsources = [xref.rawsource for xref in old_xrefs]
                new_xref_rawsources = [xref.rawsource for xref in new_xrefs]
                logger.warning(__('inconsistent term references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_xref_rawsources, new_xref_rawsources),
                               location=node)

            def get_ref_key(node):
                # type: (addnodes.pending_xref) -> Tuple[str, str, str]
                case = node["refdomain"], node["reftype"]
                if case == ('std', 'term'):
                    return None
                else:
                    return (
                        node["refdomain"],
                        node["reftype"],
                        node['reftarget'],)

            for old in old_xrefs:
                key = get_ref_key(old)
                if key:
                    xref_reftarget_map[key] = old.attributes
            for new in new_xrefs:
                key = get_ref_key(new)
                # Copy attributes to keep original node behavior. Especially
                # copying 'reftarget', 'py:module', 'py:class' are needed.
                for k, v in xref_reftarget_map.get(key, {}).items():
                    # Note: This implementation overwrite all attributes.
                    # if some attributes `k` should not be overwritten,
                    # you should provide exclude list as:
                    # `if k not in EXCLUDE_LIST: new[k] = v`
                    new[k] = v

            # update leaves
            for child in patch.children:
                child.parent = node
            node.children = patch.children

            # for highlighting that expects .rawsource and .astext() are same.
            if isinstance(node, LITERAL_TYPE_NODES):
                node.rawsource = node.astext()

            if isinstance(node, IMAGE_TYPE_NODES):
                node.update_all_atts(patch)

            node['translated'] = True  # to avoid double translation

        if 'index' in self.config.gettext_additional_targets:
            # Extract and translate messages for index entries.
            for node, entries in traverse_translatable_index(self.document):
                new_entries = []   # type: List[Tuple[str, str, str, str, str]]
                for type, msg, tid, main, key_ in entries:
                    msg_parts = split_index_msg(type, msg)
                    msgstr_parts = []
                    for part in msg_parts:
                        msgstr = catalog.gettext(part)
                        if not msgstr:
                            msgstr = part
                        msgstr_parts.append(msgstr)

                    new_entries.append((type, ';'.join(msgstr_parts), tid, main, None))

                node['raw_entries'] = entries
                node['entries'] = new_entries

        # remove translated attribute that is used for avoiding double translation.
        for translated in self.document.traverse(NodeMatcher(translated=Any)):  # type: nodes.Element  # NOQA
            translated.delattr('translated')
    def write_doc(self, docname, doctree):
        catalog = self.catalogs[docname.split(SEP, 1)[0]]

        for node, msg in extract_messages(doctree):
            catalog.setdefault(node.uid, msg)
Beispiel #13
0
def test_extract_messages(rst, node_cls, count):
    msg = extract_messages(_get_doctree(dedent(rst)))
    assert_node_count(msg, node_cls, count)
Beispiel #14
0
    def apply(self):
        env = self.document.settings.env
        settings, source = self.document.settings, self.document['source']
        # XXX check if this is reliable
        assert source.startswith(env.srcdir)
        docname = path.splitext(relative_path(env.srcdir, source))[0]
        textdomain = find_catalog(docname,
                                  self.document.settings.gettext_compact)

        # fetch translations
        dirs = [path.join(env.srcdir, directory)
                for directory in env.config.locale_dirs]
        catalog, has_catalog = init_locale(dirs, env.config.language,
                                           textdomain)
        if not has_catalog:
            return

        parser = RSTParser()

        for node, msg in extract_messages(self.document):
            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg: # as-of-yet untranslated
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            patch = new_document(source, settings)
            CustomLocaleReporter(node.source, node.line).set_reporter(patch)
            parser.parse(msgstr, patch)
            patch = patch[0]
            # XXX doctest and other block markup
            if not isinstance(patch, nodes.paragraph):
                continue # skip for now

            # auto-numbered foot note reference should use original 'ids'.
            def is_autonumber_footnote_ref(node):
                return isinstance(node, nodes.footnote_reference) and \
                    node.get('auto') == 1
            old_foot_refs = node.traverse(is_autonumber_footnote_ref)
            new_foot_refs = patch.traverse(is_autonumber_footnote_ref)
            if len(old_foot_refs) != len(new_foot_refs):
                env.warn_node('inconsistent footnote references in '
                              'translated message', node)
            for old, new in zip(old_foot_refs, new_foot_refs):
                new['ids'] = old['ids']
                for id in new['ids']:
                    self.document.ids[id] = new
                self.document.autofootnote_refs.remove(old)
                self.document.note_autofootnote_ref(new)

            # reference should use original 'refname'.
            # * reference target ".. _Python: ..." is not translatable.
            # * section refname is not translatable.
            # * inline reference "`Python <...>`_" has no 'refname'.
            def is_refnamed_ref(node):
                return isinstance(node, nodes.reference) and  \
                    'refname' in node
            old_refs = node.traverse(is_refnamed_ref)
            new_refs = patch.traverse(is_refnamed_ref)
            applied_refname_map = {}
            if len(old_refs) != len(new_refs):
                env.warn_node('inconsistent references in '
                              'translated message', node)
            for new in new_refs:
                if new['refname'] in applied_refname_map:
                    # 2nd appearance of the reference
                    new['refname'] = applied_refname_map[new['refname']]
                elif old_refs:
                    # 1st appearance of the reference in old_refs
                    old = old_refs.pop(0)
                    refname = old['refname']
                    new['refname'] = refname
                    applied_refname_map[new['refname']] = refname
                else:
                    # the reference is not found in old_refs
                    applied_refname_map[new['refname']] = new['refname']

                self.document.note_refname(new)

            # refnamed footnote and citation should use original 'ids'.
            def is_refnamed_footnote_ref(node):
                footnote_ref_classes = (nodes.footnote_reference,
                                        nodes.citation_reference)
                return isinstance(node, footnote_ref_classes) and \
                    'refname' in node
            old_refs = node.traverse(is_refnamed_footnote_ref)
            new_refs = patch.traverse(is_refnamed_footnote_ref)
            refname_ids_map = {}
            if len(old_refs) != len(new_refs):
                env.warn_node('inconsistent references in '
                              'translated message', node)
            for old in old_refs:
                refname_ids_map[old["refname"]] = old["ids"]
            for new in new_refs:
                refname = new["refname"]
                if refname in refname_ids_map:
                    new["ids"] = refname_ids_map[refname]

            # Original pending_xref['reftarget'] contain not-translated
            # target name, new pending_xref must use original one.
            # This code restricts to change ref-targets in the translation.
            old_refs = node.traverse(addnodes.pending_xref)
            new_refs = patch.traverse(addnodes.pending_xref)
            xref_reftarget_map = {}
            if len(old_refs) != len(new_refs):
                env.warn_node('inconsistent term references in '
                              'translated message', node)
            for old in old_refs:
                key = old["reftype"], old["refdomain"]
                xref_reftarget_map[key] = old["reftarget"]
            for new in new_refs:
                key = new["reftype"], new["refdomain"]
                if key in xref_reftarget_map:
                    new['reftarget'] = xref_reftarget_map[key]

            # update leaves
            for child in patch.children:
                child.parent = node
            node.children = patch.children

        # Extract and translate messages for index entries.
        for node, entries in traverse_translatable_index(self.document):
            new_entries = []
            for type, msg, tid, main in entries:
                msg_parts = split_index_msg(type, msg)
                msgstr_parts = []
                for part in msg_parts:
                    msgstr = catalog.gettext(part)
                    if not msgstr:
                        msgstr = part
                    msgstr_parts.append(msgstr)

                new_entries.append((type, ';'.join(msgstr_parts), tid, main))

            node['raw_entries'] = entries
            node['entries'] = new_entries
Beispiel #15
0
    def apply(self, **kwargs: Any) -> None:
        settings, source = self.document.settings, self.document['source']
        msgstr = ''

        # XXX check if this is reliable
        assert source.startswith(self.env.srcdir)
        docname = path.splitext(relative_path(path.join(self.env.srcdir, 'dummy'),
                                              source))[0]
        textdomain = docname_to_domain(docname, self.config.gettext_compact)

        # fetch translations
        dirs = [path.join(self.env.srcdir, directory)
                for directory in self.config.locale_dirs]
        catalog, has_catalog = init_locale(dirs, self.config.language, textdomain)
        if not has_catalog:
            return

        # phase1: replace reference ids with translated names
        for node, msg in extract_messages(self.document):
            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg or not msgstr.strip():
                # as-of-yet untranslated
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            # literalblock need literal block notation to avoid it become
            # paragraph.
            if isinstance(node, LITERAL_TYPE_NODES):
                msgstr = '::\n\n' + indent(msgstr, ' ' * 3)

            patch = publish_msgstr(self.app, msgstr, source,
                                   node.line, self.config, settings)
            # XXX doctest and other block markup
            if not isinstance(patch, nodes.paragraph):
                continue  # skip for now

            processed = False  # skip flag

            # update title(section) target name-id mapping
            if isinstance(node, nodes.title) and isinstance(node.parent, nodes.section):
                section_node = node.parent
                new_name = nodes.fully_normalize_name(patch.astext())
                old_name = nodes.fully_normalize_name(node.astext())

                if old_name != new_name:
                    # if name would be changed, replace node names and
                    # document nameids mapping with new name.
                    names = section_node.setdefault('names', [])
                    names.append(new_name)
                    # Original section name (reference target name) should be kept to refer
                    # from other nodes which is still not translated or uses explicit target
                    # name like "`text to display <explicit target name_>`_"..
                    # So, `old_name` is still exist in `names`.

                    _id = self.document.nameids.get(old_name, None)
                    explicit = self.document.nametypes.get(old_name, None)

                    # * if explicit: _id is label. title node need another id.
                    # * if not explicit:
                    #
                    #   * if _id is None:
                    #
                    #     _id is None means:
                    #
                    #     1. _id was not provided yet.
                    #
                    #     2. _id was duplicated.
                    #
                    #        old_name entry still exists in nameids and
                    #        nametypes for another duplicated entry.
                    #
                    #   * if _id is provided: bellow process
                    if _id:
                        if not explicit:
                            # _id was not duplicated.
                            # remove old_name entry from document ids database
                            # to reuse original _id.
                            self.document.nameids.pop(old_name, None)
                            self.document.nametypes.pop(old_name, None)
                            self.document.ids.pop(_id, None)

                        # re-entry with new named section node.
                        #
                        # Note: msgnode that is a second parameter of the
                        # `note_implicit_target` is not necessary here because
                        # section_node has been noted previously on rst parsing by
                        # `docutils.parsers.rst.states.RSTState.new_subsection()`
                        # and already has `system_message` if needed.
                        self.document.note_implicit_target(section_node)

                    # replace target's refname to new target name
                    matcher = NodeMatcher(nodes.target, refname=old_name)
                    for old_target in self.document.traverse(matcher):  # type: nodes.target
                        old_target['refname'] = new_name

                    processed = True

            # glossary terms update refid
            if isinstance(node, nodes.term):
                for _id in node['ids']:
                    parts = split_term_classifiers(msgstr)
                    patch = publish_msgstr(self.app, parts[0], source,
                                           node.line, self.config, settings)
                    patch = make_glossary_term(self.env, patch, parts[1],
                                               source, node.line, _id,
                                               self.document)
                    processed = True

            # update leaves with processed nodes
            if processed:
                for child in patch.children:
                    child.parent = node
                node.children = patch.children
                node['translated'] = True  # to avoid double translation

        # phase2: translation
        for node, msg in extract_messages(self.document):
            if node.get('translated', False):  # to avoid double translation
                continue  # skip if the node is already translated by phase1

            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg:  # as-of-yet untranslated
                continue

            # update translatable nodes
            if isinstance(node, addnodes.translatable):
                node.apply_translated_message(msg, msgstr)
                continue

            # update meta nodes
            if isinstance(node, nodes.pending) and is_pending_meta(node):
                node.details['nodes'][0]['content'] = msgstr
                continue

            if isinstance(node, nodes.image) and node.get('alt') == msg:
                node['alt'] = msgstr
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            # literalblock need literal block notation to avoid it become
            # paragraph.
            if isinstance(node, LITERAL_TYPE_NODES):
                msgstr = '::\n\n' + indent(msgstr, ' ' * 3)

            # Structural Subelements phase1
            # There is a possibility that only the title node is created.
            # see: https://docutils.sourceforge.io/docs/ref/doctree.html#structural-subelements
            if isinstance(node, nodes.title):
                # This generates: <section ...><title>msgstr</title></section>
                msgstr = msgstr + '\n' + '=' * len(msgstr) * 2

            patch = publish_msgstr(self.app, msgstr, source,
                                   node.line, self.config, settings)

            # Structural Subelements phase2
            if isinstance(node, nodes.title):
                # get <title> node that placed as a first child
                patch = patch.next_node()

            # ignore unexpected markups in translation message
            unexpected: Tuple[Type[Element], ...] = (
                nodes.paragraph,    # expected form of translation
                nodes.title         # generated by above "Subelements phase2"
            )

            # following types are expected if
            # config.gettext_additional_targets is configured
            unexpected += LITERAL_TYPE_NODES
            unexpected += IMAGE_TYPE_NODES

            if not isinstance(patch, unexpected):
                continue  # skip

            # auto-numbered foot note reference should use original 'ids'.
            def list_replace_or_append(lst: List[N], old: N, new: N) -> None:
                if old in lst:
                    lst[lst.index(old)] = new
                else:
                    lst.append(new)

            is_autofootnote_ref = NodeMatcher(nodes.footnote_reference, auto=Any)
            old_foot_refs: List[nodes.footnote_reference] = node.traverse(is_autofootnote_ref)
            new_foot_refs: List[nodes.footnote_reference] = patch.traverse(is_autofootnote_ref)
            if len(old_foot_refs) != len(new_foot_refs):
                old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs]
                new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs]
                logger.warning(__('inconsistent footnote references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_foot_ref_rawsources, new_foot_ref_rawsources),
                               location=node)
            old_foot_namerefs: Dict[str, List[nodes.footnote_reference]] = {}
            for r in old_foot_refs:
                old_foot_namerefs.setdefault(r.get('refname'), []).append(r)
            for newf in new_foot_refs:
                refname = newf.get('refname')
                refs = old_foot_namerefs.get(refname, [])
                if not refs:
                    newf.parent.remove(newf)
                    continue

                oldf = refs.pop(0)
                newf['ids'] = oldf['ids']
                for id in newf['ids']:
                    self.document.ids[id] = newf

                if newf['auto'] == 1:
                    # autofootnote_refs
                    list_replace_or_append(self.document.autofootnote_refs, oldf, newf)
                else:
                    # symbol_footnote_refs
                    list_replace_or_append(self.document.symbol_footnote_refs, oldf, newf)

                if refname:
                    footnote_refs = self.document.footnote_refs.setdefault(refname, [])
                    list_replace_or_append(footnote_refs, oldf, newf)

                    refnames = self.document.refnames.setdefault(refname, [])
                    list_replace_or_append(refnames, oldf, newf)

            # reference should use new (translated) 'refname'.
            # * reference target ".. _Python: ..." is not translatable.
            # * use translated refname for section refname.
            # * inline reference "`Python <...>`_" has no 'refname'.
            is_refnamed_ref = NodeMatcher(nodes.reference, refname=Any)
            old_refs: List[nodes.reference] = node.traverse(is_refnamed_ref)
            new_refs: List[nodes.reference] = patch.traverse(is_refnamed_ref)
            if len(old_refs) != len(new_refs):
                old_ref_rawsources = [ref.rawsource for ref in old_refs]
                new_ref_rawsources = [ref.rawsource for ref in new_refs]
                logger.warning(__('inconsistent references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_ref_rawsources, new_ref_rawsources),
                               location=node)
            old_ref_names = [r['refname'] for r in old_refs]
            new_ref_names = [r['refname'] for r in new_refs]
            orphans = list(set(old_ref_names) - set(new_ref_names))
            for newr in new_refs:
                if not self.document.has_name(newr['refname']):
                    # Maybe refname is translated but target is not translated.
                    # Note: multiple translated refnames break link ordering.
                    if orphans:
                        newr['refname'] = orphans.pop(0)
                    else:
                        # orphan refnames is already empty!
                        # reference number is same in new_refs and old_refs.
                        pass

                self.document.note_refname(newr)

            # refnamed footnote should use original 'ids'.
            is_refnamed_footnote_ref = NodeMatcher(nodes.footnote_reference, refname=Any)
            old_foot_refs = node.traverse(is_refnamed_footnote_ref)
            new_foot_refs = patch.traverse(is_refnamed_footnote_ref)
            refname_ids_map: Dict[str, List[str]] = {}
            if len(old_foot_refs) != len(new_foot_refs):
                old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs]
                new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs]
                logger.warning(__('inconsistent footnote references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_foot_ref_rawsources, new_foot_ref_rawsources),
                               location=node)
            for oldf in old_foot_refs:
                refname_ids_map.setdefault(oldf["refname"], []).append(oldf["ids"])
            for newf in new_foot_refs:
                refname = newf["refname"]
                if refname_ids_map.get(refname):
                    newf["ids"] = refname_ids_map[refname].pop(0)

            # citation should use original 'ids'.
            is_citation_ref = NodeMatcher(nodes.citation_reference, refname=Any)
            old_cite_refs: List[nodes.citation_reference] = node.traverse(is_citation_ref)
            new_cite_refs: List[nodes.citation_reference] = patch.traverse(is_citation_ref)
            refname_ids_map = {}
            if len(old_cite_refs) != len(new_cite_refs):
                old_cite_ref_rawsources = [ref.rawsource for ref in old_cite_refs]
                new_cite_ref_rawsources = [ref.rawsource for ref in new_cite_refs]
                logger.warning(__('inconsistent citation references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_cite_ref_rawsources, new_cite_ref_rawsources),
                               location=node)
            for oldc in old_cite_refs:
                refname_ids_map.setdefault(oldc["refname"], []).append(oldc["ids"])
            for newc in new_cite_refs:
                refname = newc["refname"]
                if refname_ids_map.get(refname):
                    newc["ids"] = refname_ids_map[refname].pop()

            # Original pending_xref['reftarget'] contain not-translated
            # target name, new pending_xref must use original one.
            # This code restricts to change ref-targets in the translation.
            old_xrefs = node.traverse(addnodes.pending_xref)
            new_xrefs = patch.traverse(addnodes.pending_xref)
            xref_reftarget_map = {}
            if len(old_xrefs) != len(new_xrefs):
                old_xref_rawsources = [xref.rawsource for xref in old_xrefs]
                new_xref_rawsources = [xref.rawsource for xref in new_xrefs]
                logger.warning(__('inconsistent term references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_xref_rawsources, new_xref_rawsources),
                               location=node)

            def get_ref_key(node: addnodes.pending_xref) -> Optional[Tuple[str, str, str]]:
                case = node["refdomain"], node["reftype"]
                if case == ('std', 'term'):
                    return None
                else:
                    return (
                        node["refdomain"],
                        node["reftype"],
                        node['reftarget'],)
def doctree_resolved(app, doctree, docname):
    def abbreviating():
        remove_items = []
        new_items = {}

        for k, v in trans_finder.master_dic.items():
            ref_list = RefList(msg=v)
            new_v = ref_list.quotedToAbbrev(k)
            has_new_v = (new_v is not None) and (len(new_v) > 0)
            if has_new_v:
                new_entry = {k: new_v}
                new_items.update(new_entry)

        has_remove_items = (len(remove_items) > 0)
        if has_remove_items:
            for k in remove_items:
                dd(f'Delete from dictionary:[{k}]')
                del trans_finder.master_dic[k]

        is_writing_changes = (len(new_items) > 0)
        if is_writing_changes:
            trans_finder.master_dic.update(new_items)
            dic_file = '/Users/hoangduytran/blender_manual/test_dic.json'
            print(
                f'Writing changes to: {dic_file}, number of records:{len(new_items)}'
            )
            trans_finder.writeJSONDic(dict_list=trans_finder.master_dic,
                                      file_name=dic_file)

    def checkDictForMultipleMeaningsInTrans():
        pattern = re.compile(r'(\w+(/\w+)+)')
        k: str = None
        v: str = None
        for k, v in trans_finder.master_dic.items():
            k_found_list = pattern.findall(k)
            v_found_list = pattern.findall(v)

            is_same = (len(k_found_list) == len(v_found_list))
            if is_same:
                continue

            k_is_single_word = (len(k.split(' ')) == 1)
            if k_is_single_word:
                continue

            PP(k_found_list)
            print('-' * 3)
            PP(v_found_list)
            print('-' * 3)
            print(f'{k}\n\n{v}')
            print('-' * 40)

    def trimmingText(text):
        txt_has_trimmable_ending = (cm.TRIMMABLE_ENDING.search(text)
                                    is not None)
        txt_has_trimmable_beginning = (cm.TRIMMABLE_BEGINNING.search(text)
                                       is not None)
        is_trim = (txt_has_trimmable_ending or txt_has_trimmable_beginning)
        if not is_trim:
            return text, False
        text = cm.TRIMMABLE_BEGINNING.sub('', text)
        text = cm.TRIMMABLE_ENDING.sub('', text)
        return text, True

    def removeDictBeginAndEndingPuncts():
        remove_set = {}
        add_set = {}

        for k, v in trans_finder.master_dic.items():
            # is_debug = ('Cut to' in k)
            # if is_debug:
            #     dd('DEBUG')

            trimmed_k, is_trimmed_k = trimmingText(k)
            trimmed_v, is_trimmed_v = trimmingText(v)

            changed = (is_trimmed_k or is_trimmed_v)
            if changed:
                remove_entry = {k: v}
                remove_set.update(remove_entry)

                add_entry = {trimmed_k: trimmed_v}
                add_set.update(add_entry)

            print(f'[{k}]')
            print(f'[{trimmed_k}]')
            print('-' * 3)
            print(f'[{v}]')
            print(f'[{trimmed_v}]')
            print('-' * 40)

        changed = False
        for k, v in remove_set.items():
            remove_entry = {k: v}
            print(f'remove:{remove_entry}')
            del trans_finder.master_dic[k]
            changed = True

        for k, v in add_set.items():
            add_entry = {k: v}
            trans_finder.master_dic.update(add_entry)
            print(f'added: {add_entry}')
            changed = True

        if changed:
            new_dict = cleanupLeadingTrailingPunct(trans_finder.master_dic)
            test_to_file = '/Users/hoangduytran/blender_manual/ref_dict_0005.json'
            trans_finder.writeJSONDic(dict_list=new_dict,
                                      file_name=test_to_file)

    def removeDuplication(txt_with_punct):
        # is_debug = (txt_with_punct.endswith('::'))
        # if is_debug:
        #     dd('DEBUG')
        cropped_txt, begin_with_punctuations, ending_with_punctuations = cm.beginAndEndPunctuation(
            txt_with_punct, is_single=True)
        trans = trans_finder.isInList(cropped_txt)
        is_repeat = (trans is not None)
        if not is_repeat:
            cropped_txt, begin_with_punctuations, ending_with_punctuations = cm.beginAndEndPunctuation(
                txt_with_punct, is_single=False)
            trans = trans_finder.isInList(cropped_txt)

        is_repeat = (trans is not None)
        return is_repeat

    def cleanupLeadingTrailingPunct(d_dict):
        return_dict = {}
        for k, v in d_dict.items():
            trimmed_k = str(k)
            trimmed_v = str(v)
            found_k = cm.WORD_WITHOUT_QUOTE.search(k)
            found_v = cm.WORD_WITHOUT_QUOTE.search(v)
            if found_k:
                trimmed_k = found_k.group(1)
            if found_v:
                trimmed_v = found_v.group(1)
            entry = {trimmed_k: trimmed_v}
            return_dict.update(entry)
        return return_dict

    def refToDictItems(ref_list):
        ref_dict = {}
        ref: RefRecord = None
        interest_ref = [
            RefType.REF,
            RefType.DOC,
            RefType.GA,
            RefType.TERM,
        ]
        for ref in ref_list:
            # print(ref)
            type = ref.getOrigin().getRefType()
            first_ref = ref.getRefItemByIndex(0)
            ref_text = first_ref.getText()

            is_debug = ('Poor mans steadycam' in ref_text)
            if is_debug:
                dd('DEBUG')

            en_part = None
            vn_part = None
            d_dict = {}
            if type == RefType.MENUSELECTION:
                print(f'MENUSELECTION:{type}')
                text_list = cm.MENU_TEXT_REVERSE.findall(ref_text)
                length = len(text_list)
                i_index = 0
                for i in range(length):
                    tran = text_list[i_index]
                    if i_index + 1 < length:
                        orig = text_list[i_index + 1]
                    else:
                        print('ERROR: Orig is NOT THERE, use original')
                        orig = ref.getOrigin().getText()

                    entry = {orig: tran}
                    print(f'menu:{entry}')
                    d_dict.update(entry)
                    i_index += 2
                    if i_index >= length:
                        break

            elif type == RefType.ABBR:
                print(f'ABBR:{type}')
                text_list = cm.ABBREV_TEXT_REVERSE.findall(ref_text)
                abbr = text_list[0]
                defin = text_list[1]
                has_further_explanation = (': ' in defin)
                if has_further_explanation:
                    exp_list = defin.split(': ')
                    orig_part = exp_list[0]
                    further_exp = exp_list[1]
                    print(
                        f'abbr:{abbr}; orig_part:{orig_part}; further_exp:{further_exp}'
                    )

                    if abbr.isascii():
                        entry = {abbr: f'{orig_part}, {further_exp}'}
                    elif orig_part.isascii():
                        entry = {orig_part: f'{further_exp}, {abbr}'}
                    else:
                        entry = {further_exp: f'{orig_part}, {abbr}'}
                    d_dict.update(entry)
                else:
                    print(f'abbr:{abbr}; defin:{defin}')
                    if defin.isascii():
                        entry = {defin: abbr}
                    else:
                        entry = {abbr: defin}
                    d_dict.update(entry)

            elif type in interest_ref:
                print(f'GENERIC_REF:{type}')
                text_list = cm.REF_TEXT_REVERSE.findall(ref_text)
                has_text = (len(text_list) > 0)
                if not has_text:
                    origin_text = ref.getOrigin().getText()
                    print(f'ERROR: origin_text:{origin_text}')
                    # print(f'{text_list}, appeared to be empty!!!')
                else:
                    vn_part, en_part = text_list[0]
                    print(f'en_part:{en_part} vn_part:{vn_part}')
                    entry = {en_part: vn_part}
                    d_dict.update(entry)
            else:
                dd(f'{type} is not the type we are looking for.')
            ref_dict.update(d_dict)

        return_dict = cleanupLeadingTrailingPunct(d_dict)

        return return_dict

    def listDictRefsToDict():
        interest_ref_list = [
            RefType.MENUSELECTION,
            RefType.REF,
            RefType.DOC,
            RefType.GA,
            RefType.TERM,
            RefType.ABBR,
        ]

        ref_dict = {}
        ref_dict_filename = '/Users/hoangduytran/blender_manual/ref_dict_refsonly.json'
        for k, v in trans_finder.master_dic.items():
            ref_list = RefList(msg=v, keep_orig=False, tf=trans_finder)
            ref_list.parseMessage()

            inter_ref_list = ref_list.getListOfRefType(interest_ref_list)
            has_ref = (len(inter_ref_list) > 0)
            if not has_ref:
                continue

            current_ref_dict = refToDictItems(inter_ref_list)
            ref_dict.update(current_ref_dict)

        has_dict_content = (len(ref_dict) > 0)
        if has_dict_content:
            trans_finder.writeJSONDic(dict_list=ref_dict,
                                      file_name=ref_dict_filename)

    def tranRef(msg, is_keep_original):
        ref_list = RefList(msg=msg,
                           keep_orig=is_keep_original,
                           tf=trans_finder)
        ref_list.parseMessage()
        ref_list.translateRefList()
        tran = ref_list.getTranslation()
        # trans_finder.addDictEntry((msg, tran))
        # print("Got translation from REF_LIST")
        return tran

    # def fuzzyTextSimilar(txt1 : str, txt2 : str, accept_ratio):
    #     try:
    #         similar_ratio = LE.ratio(txt1, txt2)
    #         is_similar = (similar_ratio >= accept_ratio)
    #         return is_similar
    #     except Exception as e:
    #         print(e)
    #         return False

    def getTimeNow(self):
        local_time = timezone('Europe/London')
        fmt = '%Y-%m-%d %H:%M%z'
        loc_dt = local_time.localize(datetime.datetime.now())
        formatted_dt = loc_dt.strftime(fmt)
        return formatted_dt

    # is_running = runAppOrNot()
    # if not is_running:
    #     return

    # correctingDictionary()
    # checkDictKeyboard()
    # checkDictRef()
    # checkNonTranslatedDictWords()
    # checkDictForMultipleMeaningsInTrans()
    # removeDictBeginAndEndingPuncts()
    # listDictRefsToDict()
    # trans_finder.saveMasterDict()
    # exit(0)

    try:

        is_debug = ('vr_scene_inspection' in docname)
        if is_debug:
            dd('DEBUG')

        ex_env_key = 'EX_PO_TRANS'
        is_ex_env_set = (ex_env_key in os.environ)
        if not is_ex_env_set:
            return
        ex_env_key_value = os.environ[ex_env_key]
        is_ex_set_true = (ex_env_key_value.lower() == 'true')
        if not is_ex_set_true:
            return

        debug_file = cm.debug_file
        if debug_file:
            is_debug_file = (debug_file in docname)
            if not is_debug_file:
                return

        build_dir = "build/rstdoc"
        po_vi_dir = "locale/vi/LC_MESSAGES"

        po_file_path = "{}.po".format(docname)
        local_path = os.path.dirname(os.path.abspath(__file__))
        blender_docs_path = cm.BLENDER_DOCS  # os.path.dirname(local_path)

        locale_vi_path = "locale/vi/LC_MESSAGES"

        po_path = os.path.join(blender_docs_path,
                               os.path.join(locale_vi_path, po_file_path))

        if not os.path.isfile(po_path):
            msg = f'po_path: {po_path} NOT FOUND!'
            print(msg)
            raise Exception(msg)
            exit(0)

        # #loading local po file to get translation if any
        po_dic, current_po_cat = trans_finder.loadPOAsDic(po_path)
        trans_finder.flatPOFile(po_path)

        rst_output_location = os.path.join(blender_docs_path, build_dir)
        output_path = os.path.join(rst_output_location, po_file_path)

        local_time = timezone(TIME_ZONE)
        time_now = local_time.localize(datetime.datetime.now())

        local_locale = locale.getlocale()[0]
        current_header = current_po_cat._get_header_comment()
        new_po_cat = Catalog(locale="vi",
                             header_comment=current_header,
                             project=current_po_cat.project,
                             version=current_po_cat.version,
                             copyright_holder=YOUR_ID,
                             creation_date=current_po_cat.creation_date,
                             revision_date=time_now,
                             last_translator=YOUR_ID,
                             language_team=YOUR_TRANSLATION_TEAM)

        dd("#" * 80)
        dd("filename: {}".format(output_path))

        # msgid = "Lines should be less than 120 characters long."
        # msgstr = "Số chữ trong các dòng phải ít hơn 120 ký tự de lam gi."
        # trans_finder.addDictEntry((msgid, msgstr), False)
        # exit(0)

        for node, msg in extract_messages(doctree):
            msg = msg.strip()
            dd("=" * 80)
            dd("msgid:[{}]".format(msg))

            # clean up po file

            is_inline = isinstance(node, nodes.inline)
            is_emphasis = isinstance(node, nodes.emphasis)
            is_title = isinstance(node, nodes.title)
            is_term = isinstance(node, nodes.term)
            is_rubric = isinstance(node, nodes.rubric)
            is_field_name = isinstance(node, nodes.field_name)
            is_reference = isinstance(node, nodes.reference)
            is_strong = isinstance(node, nodes.strong)

            is_keep_original = (is_inline or is_emphasis or is_title or is_term
                                or is_rubric or is_field_name or is_reference
                                or is_strong)

            tran = None
            # is_debug = ('Get involved in discussions' in msg)
            # if is_debug:
            #     dd('DEBUG')
            is_ignore = ig.isIgnored(msg)
            if is_ignore:
                print(f'IGNORED: {msg}')
                continue

            # is_added = False
            tran, is_ignore = trans_finder.findTranslation(msg)
            if is_ignore:
                continue

            has_translation = (tran is not None)
            if not has_translation:
                is_debug = ('is based on the OpenXR specification' in msg)
                if is_debug:
                    dd('Debug')

                ref_list = RefList(msg=msg,
                                   keep_orig=is_keep_original,
                                   tf=trans_finder)
                ref_list.parseMessage()
                ref_list.translateRefList()
                tran = ref_list.getTranslation()
                # tran = tranRef(msg, is_keep_original)
                has_translation = (tran is not None)
                if not has_translation:
                    tran = po_dic[msg]

            has_translation = (tran is not None)
            if has_translation:
                has_month = ('Tháng ' in tran)
                has_original = (msg.lower() in tran.lower())
                has_link = (cm.REF_LINK.search(tran) is not None)
                can_ignore = (has_month or has_original or has_link)
                is_repeat = is_keep_original and not can_ignore
                if is_repeat:
                    print('Repeating MSG')
                    tran = cm.matchCase(msg, tran)
                    tran = f'{tran} -- {msg}'
                    print(f'Repeating MSG:{tran}')

            if tran is not None:
                new_po_cat.add(msg, string=tran)
            else:
                new_po_cat.add(msg, string="")

            print(f'msgid \"{msg}\"')
            if tran is not None:
                print(f'msgstr \"{tran}\"')
            else:
                print('msgstr \"\"')

        print("Output to the path:", new_po_cat, output_path)
        c.dump_po(output_path, new_po_cat)
        # dd('DEBUG')
    except Exception as e:
        df.LOG(f'{e}', error=True)
Beispiel #17
0
    def apply(self):
        env = self.document.settings.env
        settings, source = self.document.settings, self.document['source']
        # XXX check if this is reliable
        assert source.startswith(env.srcdir)
        docname = path.splitext(relative_path(env.srcdir, source))[0]
        textdomain = find_catalog(docname,
                                  self.document.settings.gettext_compact)

        # fetch translations
        dirs = [
            path.join(env.srcdir, directory)
            for directory in env.config.locale_dirs
        ]
        catalog, has_catalog = init_locale(dirs, env.config.language,
                                           textdomain)
        if not has_catalog:
            return

        parser = RSTParser()

        #phase1: replace reference ids with translated names
        for node, msg in extract_messages(self.document):
            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg or not msgstr.strip():
                # as-of-yet untranslated
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            patch = new_document(source, settings)
            CustomLocaleReporter(node.source, node.line).set_reporter(patch)
            parser.parse(msgstr, patch)
            patch = patch[0]
            # XXX doctest and other block markup
            if not isinstance(patch, nodes.paragraph):
                continue  # skip for now

            processed = False  # skip flag

            # update title(section) target name-id mapping
            if isinstance(node, nodes.title):
                section_node = node.parent
                new_name = nodes.fully_normalize_name(patch.astext())
                old_name = nodes.fully_normalize_name(node.astext())

                if old_name != new_name:
                    # if name would be changed, replace node names and
                    # document nameids mapping with new name.
                    names = section_node.setdefault('names', [])
                    names.append(new_name)
                    if old_name in names:
                        names.remove(old_name)

                    _id = self.document.nameids.get(old_name, None)
                    explicit = self.document.nametypes.get(old_name, None)

                    # * if explicit: _id is label. title node need another id.
                    # * if not explicit:
                    #
                    #   * _id is None:
                    #
                    #     _id is None means _id was duplicated.
                    #     old_name entry still exists in nameids and
                    #     nametypes for another duplicated entry.
                    #
                    #   * _id is provided: bellow process
                    if not explicit and _id:
                        # _id was not duplicated.
                        # remove old_name entry from document ids database
                        # to reuse original _id.
                        self.document.nameids.pop(old_name, None)
                        self.document.nametypes.pop(old_name, None)
                        self.document.ids.pop(_id, None)

                    # re-entry with new named section node.
                    self.document.note_implicit_target(section_node,
                                                       section_node)

                    # replace target's refname to new target name
                    def is_named_target(node):
                        return isinstance(node, nodes.target) and  \
                            node.get('refname') == old_name

                    for old_target in self.document.traverse(is_named_target):
                        old_target['refname'] = new_name

                    processed = True

            # glossary terms update refid
            if isinstance(node, nodes.term):
                gloss_entries = env.temp_data.setdefault(
                    'gloss_entries', set())
                ids = []
                termnodes = []
                for _id in node['names']:
                    if _id in gloss_entries:
                        gloss_entries.remove(_id)
                    _id, _, new_termnodes = \
                        make_termnodes_from_paragraph_node(env, patch, _id)
                    ids.append(_id)
                    termnodes.extend(new_termnodes)

                if termnodes and ids:
                    patch = make_term_from_paragraph_node(termnodes, ids)
                    node['ids'] = patch['ids']
                    node['names'] = patch['names']
                    processed = True

            # update leaves with processed nodes
            if processed:
                for child in patch.children:
                    child.parent = node
                node.children = patch.children
                node['translated'] = True

        #phase2: translation
        for node, msg in extract_messages(self.document):
            if node.get('translated', False):
                continue

            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg:  # as-of-yet untranslated
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            patch = new_document(source, settings)
            CustomLocaleReporter(node.source, node.line).set_reporter(patch)
            parser.parse(msgstr, patch)
            patch = patch[0]
            # XXX doctest and other block markup
            if not isinstance(patch, nodes.paragraph):
                continue  # skip for now

            # auto-numbered foot note reference should use original 'ids'.
            def is_autonumber_footnote_ref(node):
                return isinstance(node, nodes.footnote_reference) and \
                    node.get('auto') == 1

            def list_replace_or_append(lst, old, new):
                if old in lst:
                    lst[lst.index(old)] = new
                else:
                    lst.append(new)

            old_foot_refs = node.traverse(is_autonumber_footnote_ref)
            new_foot_refs = patch.traverse(is_autonumber_footnote_ref)
            if len(old_foot_refs) != len(new_foot_refs):
                env.warn_node(
                    'inconsistent footnote references in '
                    'translated message', node)
            old_foot_namerefs = {}
            for r in old_foot_refs:
                old_foot_namerefs.setdefault(r.get('refname'), []).append(r)
            for new in new_foot_refs:
                refname = new.get('refname')
                refs = old_foot_namerefs.get(refname, [])
                if not refs:
                    continue

                old = refs.pop(0)
                new['ids'] = old['ids']
                for id in new['ids']:
                    self.document.ids[id] = new
                list_replace_or_append(self.document.autofootnote_refs, old,
                                       new)
                if refname:
                    list_replace_or_append(
                        self.document.footnote_refs.setdefault(refname, []),
                        old, new)
                    list_replace_or_append(
                        self.document.refnames.setdefault(refname, []), old,
                        new)

            # reference should use new (translated) 'refname'.
            # * reference target ".. _Python: ..." is not translatable.
            # * use translated refname for section refname.
            # * inline reference "`Python <...>`_" has no 'refname'.
            def is_refnamed_ref(node):
                return isinstance(node, nodes.reference) and  \
                    'refname' in node

            old_refs = node.traverse(is_refnamed_ref)
            new_refs = patch.traverse(is_refnamed_ref)
            if len(old_refs) != len(new_refs):
                env.warn_node(
                    'inconsistent references in '
                    'translated message', node)
            old_ref_names = [r['refname'] for r in old_refs]
            new_ref_names = [r['refname'] for r in new_refs]
            orphans = list(set(old_ref_names) - set(new_ref_names))
            for new in new_refs:
                if not self.document.has_name(new['refname']):
                    # Maybe refname is translated but target is not translated.
                    # Note: multiple translated refnames break link ordering.
                    if orphans:
                        new['refname'] = orphans.pop(0)
                    else:
                        # orphan refnames is already empty!
                        # reference number is same in new_refs and old_refs.
                        pass

                self.document.note_refname(new)

            # refnamed footnote and citation should use original 'ids'.
            def is_refnamed_footnote_ref(node):
                footnote_ref_classes = (nodes.footnote_reference,
                                        nodes.citation_reference)
                return isinstance(node, footnote_ref_classes) and \
                    'refname' in node

            old_refs = node.traverse(is_refnamed_footnote_ref)
            new_refs = patch.traverse(is_refnamed_footnote_ref)
            refname_ids_map = {}
            if len(old_refs) != len(new_refs):
                env.warn_node(
                    'inconsistent references in '
                    'translated message', node)
            for old in old_refs:
                refname_ids_map[old["refname"]] = old["ids"]
            for new in new_refs:
                refname = new["refname"]
                if refname in refname_ids_map:
                    new["ids"] = refname_ids_map[refname]

            # Original pending_xref['reftarget'] contain not-translated
            # target name, new pending_xref must use original one.
            # This code restricts to change ref-targets in the translation.
            old_refs = node.traverse(addnodes.pending_xref)
            new_refs = patch.traverse(addnodes.pending_xref)
            xref_reftarget_map = {}
            if len(old_refs) != len(new_refs):
                env.warn_node(
                    'inconsistent term references in '
                    'translated message', node)

            def get_ref_key(node):
                case = node["refdomain"], node["reftype"]
                if case == ('std', 'term'):
                    return None
                else:
                    return (
                        node["refdomain"],
                        node["reftype"],
                        node['reftarget'],
                    )
Beispiel #18
0
    def apply(self):
        env = self.document.settings.env
        settings, source = self.document.settings, self.document['source']
        # XXX check if this is reliable
        assert source.startswith(env.srcdir)
        docname = path.splitext(relative_path(env.srcdir, source))[0]
        textdomain = find_catalog(docname,
                                  self.document.settings.gettext_compact)

        # fetch translations
        dirs = [path.join(env.srcdir, directory)
                for directory in env.config.locale_dirs]
        catalog, has_catalog = init_locale(dirs, env.config.language,
                                           textdomain)
        if not has_catalog:
            return

        parser = RSTParser()

        #phase1: replace reference ids with translated names
        for node, msg in extract_messages(self.document):
            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg or not msgstr.strip():
                # as-of-yet untranslated
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            patch = new_document(source, settings)
            CustomLocaleReporter(node.source, node.line).set_reporter(patch)
            parser.parse(msgstr, patch)
            patch = patch[0]
            # XXX doctest and other block markup
            if not isinstance(patch, nodes.paragraph):
                continue # skip for now

            processed = False  # skip flag

            # update title(section) target name-id mapping
            if isinstance(node, nodes.title):
                section_node = node.parent
                new_name = nodes.fully_normalize_name(patch.astext())
                old_name = nodes.fully_normalize_name(node.astext())

                if old_name != new_name:
                    # if name would be changed, replace node names and
                    # document nameids mapping with new name.
                    names = section_node.setdefault('names', [])
                    names.append(new_name)
                    if old_name in names:
                        names.remove(old_name)

                    _id = self.document.nameids.get(old_name, None)
                    explicit = self.document.nametypes.get(old_name, None)

                    # * if explicit: _id is label. title node need another id.
                    # * if not explicit:
                    #
                    #   * _id is None:
                    #
                    #     _id is None means _id was duplicated.
                    #     old_name entry still exists in nameids and
                    #     nametypes for another duplicated entry.
                    #
                    #   * _id is provided: bellow process
                    if not explicit and _id:
                        # _id was not duplicated.
                        # remove old_name entry from document ids database
                        # to reuse original _id.
                        self.document.nameids.pop(old_name, None)
                        self.document.nametypes.pop(old_name, None)
                        self.document.ids.pop(_id, None)

                    # re-entry with new named section node.
                    self.document.note_implicit_target(
                            section_node, section_node)

                    # replace target's refname to new target name
                    def is_named_target(node):
                        return isinstance(node, nodes.target) and  \
                            node.get('refname') == old_name
                    for old_target in self.document.traverse(is_named_target):
                        old_target['refname'] = new_name

                    processed = True

            # glossary terms update refid
            if isinstance(node, nodes.term):
                gloss_entries = env.temp_data.setdefault('gloss_entries', set())
                ids = []
                termnodes = []
                for _id in node['names']:
                    if _id in gloss_entries:
                        gloss_entries.remove(_id)
                    _id, _, new_termnodes = \
                        make_termnodes_from_paragraph_node(env, patch, _id)
                    ids.append(_id)
                    termnodes.extend(new_termnodes)

                if termnodes and ids:
                    patch = make_term_from_paragraph_node(termnodes, ids)
                    node['ids'] = patch['ids']
                    node['names'] = patch['names']
                    processed = True

            # update leaves with processed nodes
            if processed:
                for child in patch.children:
                    child.parent = node
                node.children = patch.children
                node['translated'] = True


        #phase2: translation
        for node, msg in extract_messages(self.document):
            if node.get('translated', False):
                continue

            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg: # as-of-yet untranslated
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            patch = new_document(source, settings)
            CustomLocaleReporter(node.source, node.line).set_reporter(patch)
            parser.parse(msgstr, patch)
            patch = patch[0]
            # XXX doctest and other block markup
            if not isinstance(patch, nodes.paragraph):
                continue # skip for now

            # auto-numbered foot note reference should use original 'ids'.
            def is_autonumber_footnote_ref(node):
                return isinstance(node, nodes.footnote_reference) and \
                    node.get('auto') == 1
            def list_replace_or_append(lst, old, new):
                if old in lst:
                    lst[lst.index(old)] = new
                else:
                    lst.append(new)
            old_foot_refs = node.traverse(is_autonumber_footnote_ref)
            new_foot_refs = patch.traverse(is_autonumber_footnote_ref)
            if len(old_foot_refs) != len(new_foot_refs):
                env.warn_node('inconsistent footnote references in '
                              'translated message', node)
            old_foot_namerefs = {}
            for r in old_foot_refs:
                old_foot_namerefs.setdefault(r.get('refname'), []).append(r)
            for new in new_foot_refs:
                refname = new.get('refname')
                refs = old_foot_namerefs.get(refname, [])
                if not refs:
                    continue

                old = refs.pop(0)
                new['ids'] = old['ids']
                for id in new['ids']:
                    self.document.ids[id] = new
                list_replace_or_append(
                        self.document.autofootnote_refs, old, new)
                if refname:
                    list_replace_or_append(
                        self.document.footnote_refs.setdefault(refname, []),
                        old, new)
                    list_replace_or_append(
                        self.document.refnames.setdefault(refname, []),
                        old, new)

            # reference should use new (translated) 'refname'.
            # * reference target ".. _Python: ..." is not translatable.
            # * use translated refname for section refname.
            # * inline reference "`Python <...>`_" has no 'refname'.
            def is_refnamed_ref(node):
                return isinstance(node, nodes.reference) and  \
                    'refname' in node
            old_refs = node.traverse(is_refnamed_ref)
            new_refs = patch.traverse(is_refnamed_ref)
            if len(old_refs) != len(new_refs):
                env.warn_node('inconsistent references in '
                              'translated message', node)
            old_ref_names = [r['refname'] for r in old_refs]
            new_ref_names = [r['refname'] for r in new_refs]
            orphans = list(set(old_ref_names) - set(new_ref_names))
            for new in new_refs:
                if not self.document.has_name(new['refname']):
                    # Maybe refname is translated but target is not translated.
                    # Note: multiple translated refnames break link ordering.
                    if orphans:
                        new['refname'] = orphans.pop(0)
                    else:
                        # orphan refnames is already empty!
                        # reference number is same in new_refs and old_refs.
                        pass

                self.document.note_refname(new)

            # refnamed footnote and citation should use original 'ids'.
            def is_refnamed_footnote_ref(node):
                footnote_ref_classes = (nodes.footnote_reference,
                                        nodes.citation_reference)
                return isinstance(node, footnote_ref_classes) and \
                    'refname' in node
            old_refs = node.traverse(is_refnamed_footnote_ref)
            new_refs = patch.traverse(is_refnamed_footnote_ref)
            refname_ids_map = {}
            if len(old_refs) != len(new_refs):
                env.warn_node('inconsistent references in '
                              'translated message', node)
            for old in old_refs:
                refname_ids_map[old["refname"]] = old["ids"]
            for new in new_refs:
                refname = new["refname"]
                if refname in refname_ids_map:
                    new["ids"] = refname_ids_map[refname]

            # Original pending_xref['reftarget'] contain not-translated
            # target name, new pending_xref must use original one.
            # This code restricts to change ref-targets in the translation.
            old_refs = node.traverse(addnodes.pending_xref)
            new_refs = patch.traverse(addnodes.pending_xref)
            xref_reftarget_map = {}
            if len(old_refs) != len(new_refs):
                env.warn_node('inconsistent term references in '
                              'translated message', node)
            def get_ref_key(node):
                case = node["refdomain"], node["reftype"]
                if case == ('std', 'term'):
                    return None
                else:
                    return (
                        node["refdomain"],
                        node["reftype"],
                        node['reftarget'],)

            for old in old_refs:
                key = get_ref_key(old)
                if key:
                    xref_reftarget_map[key] = old["reftarget"]
            for new in new_refs:
                key = get_ref_key(new)
                if key in xref_reftarget_map:
                    new['reftarget'] = xref_reftarget_map[key]

            # update leaves
            for child in patch.children:
                child.parent = node
            node.children = patch.children
            node['translated'] = True

        # Extract and translate messages for index entries.
        for node, entries in traverse_translatable_index(self.document):
            new_entries = []
            for type, msg, tid, main in entries:
                msg_parts = split_index_msg(type, msg)
                msgstr_parts = []
                for part in msg_parts:
                    msgstr = catalog.gettext(part)
                    if not msgstr:
                        msgstr = part
                    msgstr_parts.append(msgstr)

                new_entries.append((type, ';'.join(msgstr_parts), tid, main))

            node['raw_entries'] = entries
            node['entries'] = new_entries
Beispiel #19
0
def test_extract_messages(rst, node_cls, count):
    msg = extract_messages(_get_doctree(dedent(rst)))
    assert_node_count(msg, node_cls, count)
Beispiel #20
0
    def apply(self):
        env = self.document.settings.env
        settings, source = self.document.settings, self.document['source']
        # XXX check if this is reliable
        assert source.startswith(env.srcdir)
        docname = path.splitext(relative_path(path.join(env.srcdir, 'dummy'),
                                              source))[0]
        textdomain = find_catalog(docname,
                                  self.document.settings.gettext_compact)

        # fetch translations
        dirs = [path.join(env.srcdir, directory)
                for directory in env.config.locale_dirs]
        catalog, has_catalog = init_locale(dirs, env.config.language,
                                           textdomain,
                                           charset=env.config.source_encoding)
        if not has_catalog:
            return

        parser = RSTParser()

        # phase1: replace reference ids with translated names
        for node, msg in extract_messages(self.document):
            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg or not msgstr.strip():
                # as-of-yet untranslated
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            # literalblock need literal block notation to avoid it become
            # paragraph.
            if isinstance(node, LITERAL_TYPE_NODES):
                msgstr = '::\n\n' + indent(msgstr, ' '*3)

            patch = new_document(source, settings)
            CustomLocaleReporter(node.source, node.line).set_reporter(patch)
            parser.parse(msgstr, patch)
            try:
                patch = patch[0]
            except IndexError:  # empty node
                pass
            # XXX doctest and other block markup
            if not isinstance(patch, nodes.paragraph):
                continue  # skip for now

            processed = False  # skip flag

            # update title(section) target name-id mapping
            if isinstance(node, nodes.title):
                section_node = node.parent
                new_name = nodes.fully_normalize_name(patch.astext())
                old_name = nodes.fully_normalize_name(node.astext())

                if old_name != new_name:
                    # if name would be changed, replace node names and
                    # document nameids mapping with new name.
                    names = section_node.setdefault('names', [])
                    names.append(new_name)
                    # Original section name (reference target name) should be kept to refer
                    # from other nodes which is still not translated or uses explicit target
                    # name like "`text to display <explicit target name_>`_"..
                    # So, `old_name` is still exist in `names`.

                    _id = self.document.nameids.get(old_name, None)
                    explicit = self.document.nametypes.get(old_name, None)

                    # * if explicit: _id is label. title node need another id.
                    # * if not explicit:
                    #
                    #   * if _id is None:
                    #
                    #     _id is None means:
                    #
                    #     1. _id was not provided yet.
                    #
                    #     2. _id was duplicated.
                    #
                    #        old_name entry still exists in nameids and
                    #        nametypes for another duplicated entry.
                    #
                    #   * if _id is provided: bellow process
                    if _id:
                        if not explicit:
                            # _id was not duplicated.
                            # remove old_name entry from document ids database
                            # to reuse original _id.
                            self.document.nameids.pop(old_name, None)
                            self.document.nametypes.pop(old_name, None)
                            self.document.ids.pop(_id, None)

                        # re-entry with new named section node.
                        #
                        # Note: msgnode that is a second parameter of the
                        # `note_implicit_target` is not necessary here because
                        # section_node has been noted previously on rst parsing by
                        # `docutils.parsers.rst.states.RSTState.new_subsection()`
                        # and already has `system_message` if needed.
                        self.document.note_implicit_target(section_node)

                    # replace target's refname to new target name
                    def is_named_target(node):
                        return isinstance(node, nodes.target) and  \
                            node.get('refname') == old_name
                    for old_target in self.document.traverse(is_named_target):
                        old_target['refname'] = new_name

                    processed = True

            # glossary terms update refid
            if isinstance(node, nodes.term):
                gloss_entries = env.temp_data.setdefault('gloss_entries', set())
                ids = []
                termnodes = []
                for _id in node['names']:
                    if _id in gloss_entries:
                        gloss_entries.remove(_id)
                    _id, _, new_termnodes = \
                        make_termnodes_from_paragraph_node(env, patch, _id)
                    ids.append(_id)
                    termnodes.extend(new_termnodes)

                if termnodes and ids:
                    patch = make_term_from_paragraph_node(termnodes, ids)
                    node['ids'] = patch['ids']
                    node['names'] = patch['names']
                    processed = True

            # update leaves with processed nodes
            if processed:
                for child in patch.children:
                    child.parent = node
                node.children = patch.children
                node['translated'] = True

        # phase2: translation
        for node, msg in extract_messages(self.document):
            if node.get('translated', False):
                continue

            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg:  # as-of-yet untranslated
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            # literalblock need literal block notation to avoid it become
            # paragraph.
            if isinstance(node, LITERAL_TYPE_NODES):
                msgstr = '::\n\n' + indent(msgstr, ' '*3)

            patch = new_document(source, settings)
            CustomLocaleReporter(node.source, node.line).set_reporter(patch)
            parser.parse(msgstr, patch)
            try:
                patch = patch[0]
            except IndexError:  # empty node
                pass
            # XXX doctest and other block markup
            if not isinstance(
                    patch,
                    (nodes.paragraph,) + LITERAL_TYPE_NODES + IMAGE_TYPE_NODES):
                continue  # skip for now

            # auto-numbered foot note reference should use original 'ids'.
            def is_autonumber_footnote_ref(node):
                return isinstance(node, nodes.footnote_reference) and \
                    node.get('auto') == 1

            def list_replace_or_append(lst, old, new):
                if old in lst:
                    lst[lst.index(old)] = new
                else:
                    lst.append(new)
            old_foot_refs = node.traverse(is_autonumber_footnote_ref)
            new_foot_refs = patch.traverse(is_autonumber_footnote_ref)
            if len(old_foot_refs) != len(new_foot_refs):
                env.warn_node('inconsistent footnote references in '
                              'translated message', node)
            old_foot_namerefs = {}
            for r in old_foot_refs:
                old_foot_namerefs.setdefault(r.get('refname'), []).append(r)
            for new in new_foot_refs:
                refname = new.get('refname')
                refs = old_foot_namerefs.get(refname, [])
                if not refs:
                    continue

                old = refs.pop(0)
                new['ids'] = old['ids']
                for id in new['ids']:
                    self.document.ids[id] = new
                list_replace_or_append(
                    self.document.autofootnote_refs, old, new)
                if refname:
                    list_replace_or_append(
                        self.document.footnote_refs.setdefault(refname, []),
                        old, new)
                    list_replace_or_append(
                        self.document.refnames.setdefault(refname, []),
                        old, new)

            # reference should use new (translated) 'refname'.
            # * reference target ".. _Python: ..." is not translatable.
            # * use translated refname for section refname.
            # * inline reference "`Python <...>`_" has no 'refname'.
            def is_refnamed_ref(node):
                return isinstance(node, nodes.reference) and  \
                    'refname' in node
            old_refs = node.traverse(is_refnamed_ref)
            new_refs = patch.traverse(is_refnamed_ref)
            if len(old_refs) != len(new_refs):
                env.warn_node('inconsistent references in '
                              'translated message', node)
            old_ref_names = [r['refname'] for r in old_refs]
            new_ref_names = [r['refname'] for r in new_refs]
            orphans = list(set(old_ref_names) - set(new_ref_names))
            for new in new_refs:
                if not self.document.has_name(new['refname']):
                    # Maybe refname is translated but target is not translated.
                    # Note: multiple translated refnames break link ordering.
                    if orphans:
                        new['refname'] = orphans.pop(0)
                    else:
                        # orphan refnames is already empty!
                        # reference number is same in new_refs and old_refs.
                        pass

                self.document.note_refname(new)

            # refnamed footnote and citation should use original 'ids'.
            def is_refnamed_footnote_ref(node):
                footnote_ref_classes = (nodes.footnote_reference,
                                        nodes.citation_reference)
                return isinstance(node, footnote_ref_classes) and \
                    'refname' in node
            old_refs = node.traverse(is_refnamed_footnote_ref)
            new_refs = patch.traverse(is_refnamed_footnote_ref)
            refname_ids_map = {}
            if len(old_refs) != len(new_refs):
                env.warn_node('inconsistent references in '
                              'translated message', node)
            for old in old_refs:
                refname_ids_map[old["refname"]] = old["ids"]
            for new in new_refs:
                refname = new["refname"]
                if refname in refname_ids_map:
                    new["ids"] = refname_ids_map[refname]

            # Original pending_xref['reftarget'] contain not-translated
            # target name, new pending_xref must use original one.
            # This code restricts to change ref-targets in the translation.
            old_refs = node.traverse(addnodes.pending_xref)
            new_refs = patch.traverse(addnodes.pending_xref)
            xref_reftarget_map = {}
            if len(old_refs) != len(new_refs):
                env.warn_node('inconsistent term references in '
                              'translated message', node)

            def get_ref_key(node):
                case = node["refdomain"], node["reftype"]
                if case == ('std', 'term'):
                    return None
                else:
                    return (
                        node["refdomain"],
                        node["reftype"],
                        node['reftarget'],)

            for old in old_refs:
                key = get_ref_key(old)
                if key:
                    xref_reftarget_map[key] = old.attributes
            for new in new_refs:
                key = get_ref_key(new)
                # Copy attributes to keep original node behavior. Especially
                # copying 'reftarget', 'py:module', 'py:class' are needed.
                for k, v in xref_reftarget_map.get(key, {}).items():
                    # Note: This implementation overwrite all attributes.
                    # if some attributes `k` should not be overwritten,
                    # you should provide exclude list as:
                    # `if k not in EXCLUDE_LIST: new[k] = v`
                    new[k] = v

            # update leaves
            for child in patch.children:
                child.parent = node
            node.children = patch.children

            # for highlighting that expects .rawsource and .astext() are same.
            if isinstance(node, LITERAL_TYPE_NODES):
                node.rawsource = node.astext()

            if isinstance(node, IMAGE_TYPE_NODES):
                node.update_all_atts(patch)

            node['translated'] = True

        if 'index' in env.config.gettext_additional_targets:
            # Extract and translate messages for index entries.
            for node, entries in traverse_translatable_index(self.document):
                new_entries = []
                for type, msg, tid, main in entries:
                    msg_parts = split_index_msg(type, msg)
                    msgstr_parts = []
                    for part in msg_parts:
                        msgstr = catalog.gettext(part)
                        if not msgstr:
                            msgstr = part
                        msgstr_parts.append(msgstr)

                    new_entries.append((type, ';'.join(msgstr_parts), tid, main))

                node['raw_entries'] = entries
                node['entries'] = new_entries
Beispiel #21
0
    def apply(self):
        # type: () -> None
        settings, source = self.document.settings, self.document['source']
        # XXX check if this is reliable
        assert source.startswith(self.env.srcdir)
        docname = path.splitext(
            relative_path(path.join(self.env.srcdir, 'dummy'), source))[0]
        textdomain = find_catalog(docname,
                                  self.document.settings.gettext_compact)

        # fetch translations
        dirs = [
            path.join(self.env.srcdir, directory)
            for directory in self.config.locale_dirs
        ]
        catalog, has_catalog = init_locale(dirs, self.config.language,
                                           textdomain)
        if not has_catalog:
            return

        # phase1: replace reference ids with translated names
        for node, msg in extract_messages(self.document):
            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg or not msgstr.strip():
                # as-of-yet untranslated
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            # literalblock need literal block notation to avoid it become
            # paragraph.
            if isinstance(node, LITERAL_TYPE_NODES):
                msgstr = '::\n\n' + indent(msgstr, ' ' * 3)

            patch = publish_msgstr(self.app, msgstr, source, node.line,
                                   self.config, settings)
            # XXX doctest and other block markup
            if not isinstance(patch, nodes.paragraph):
                continue  # skip for now

            processed = False  # skip flag

            # update title(section) target name-id mapping
            if isinstance(node, nodes.title):
                section_node = node.parent
                new_name = nodes.fully_normalize_name(patch.astext())
                old_name = nodes.fully_normalize_name(node.astext())

                if old_name != new_name:
                    # if name would be changed, replace node names and
                    # document nameids mapping with new name.
                    names = section_node.setdefault('names', [])
                    names.append(new_name)
                    # Original section name (reference target name) should be kept to refer
                    # from other nodes which is still not translated or uses explicit target
                    # name like "`text to display <explicit target name_>`_"..
                    # So, `old_name` is still exist in `names`.

                    _id = self.document.nameids.get(old_name, None)
                    explicit = self.document.nametypes.get(old_name, None)

                    # * if explicit: _id is label. title node need another id.
                    # * if not explicit:
                    #
                    #   * if _id is None:
                    #
                    #     _id is None means:
                    #
                    #     1. _id was not provided yet.
                    #
                    #     2. _id was duplicated.
                    #
                    #        old_name entry still exists in nameids and
                    #        nametypes for another duplicated entry.
                    #
                    #   * if _id is provided: bellow process
                    if _id:
                        if not explicit:
                            # _id was not duplicated.
                            # remove old_name entry from document ids database
                            # to reuse original _id.
                            self.document.nameids.pop(old_name, None)
                            self.document.nametypes.pop(old_name, None)
                            self.document.ids.pop(_id, None)

                        # re-entry with new named section node.
                        #
                        # Note: msgnode that is a second parameter of the
                        # `note_implicit_target` is not necessary here because
                        # section_node has been noted previously on rst parsing by
                        # `docutils.parsers.rst.states.RSTState.new_subsection()`
                        # and already has `system_message` if needed.
                        self.document.note_implicit_target(section_node)

                    # replace target's refname to new target name
                    def is_named_target(node):
                        # type: (nodes.Node) -> bool
                        return isinstance(node, nodes.target) and  \
                            node.get('refname') == old_name

                    for old_target in self.document.traverse(is_named_target):
                        old_target['refname'] = new_name

                    processed = True

            # glossary terms update refid
            if isinstance(node, nodes.term):
                gloss_entries = self.env.temp_data.setdefault(
                    'gloss_entries', set())
                for _id in node['names']:
                    if _id in gloss_entries:
                        gloss_entries.remove(_id)

                    parts = split_term_classifiers(msgstr)
                    patch = publish_msgstr(self.app, parts[0], source,
                                           node.line, self.config, settings)
                    patch = make_glossary_term(self.env, patch, parts[1],
                                               source, node.line, _id)
                    node['ids'] = patch['ids']
                    node['names'] = patch['names']
                    processed = True

            # update leaves with processed nodes
            if processed:
                for child in patch.children:
                    child.parent = node
                node.children = patch.children
                node['translated'] = True

        # phase2: translation
        for node, msg in extract_messages(self.document):
            if node.get('translated', False):
                continue

            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg:  # as-of-yet untranslated
                continue

            # update translatable nodes
            if isinstance(node, addnodes.translatable):
                node.apply_translated_message(msg, msgstr)
                continue

            # update meta nodes
            if is_pending_meta(node):
                node.details['nodes'][0]['content'] = msgstr
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            # literalblock need literal block notation to avoid it become
            # paragraph.
            if isinstance(node, LITERAL_TYPE_NODES):
                msgstr = '::\n\n' + indent(msgstr, ' ' * 3)

            patch = publish_msgstr(self.app, msgstr, source, node.line,
                                   self.config, settings)
            # XXX doctest and other block markup
            if not isinstance(
                    patch,
                (nodes.paragraph, ) + LITERAL_TYPE_NODES + IMAGE_TYPE_NODES):
                continue  # skip for now

            # auto-numbered foot note reference should use original 'ids'.
            def is_autonumber_footnote_ref(node):
                # type: (nodes.Node) -> bool
                return isinstance(node, nodes.footnote_reference) and \
                    node.get('auto') == 1

            def list_replace_or_append(lst, old, new):
                # type: (List, Any, Any) -> None
                if old in lst:
                    lst[lst.index(old)] = new
                else:
                    lst.append(new)

            old_foot_refs = node.traverse(is_autonumber_footnote_ref)
            new_foot_refs = patch.traverse(is_autonumber_footnote_ref)
            if len(old_foot_refs) != len(new_foot_refs):
                logger.warning(
                    'inconsistent footnote references in translated message',
                    location=node)
            old_foot_namerefs = {
            }  # type: Dict[unicode, List[nodes.footnote_reference]]
            for r in old_foot_refs:
                old_foot_namerefs.setdefault(r.get('refname'), []).append(r)
            for new in new_foot_refs:
                refname = new.get('refname')
                refs = old_foot_namerefs.get(refname, [])
                if not refs:
                    continue

                old = refs.pop(0)
                new['ids'] = old['ids']
                for id in new['ids']:
                    self.document.ids[id] = new
                list_replace_or_append(self.document.autofootnote_refs, old,
                                       new)
                if refname:
                    list_replace_or_append(
                        self.document.footnote_refs.setdefault(refname, []),
                        old, new)
                    list_replace_or_append(
                        self.document.refnames.setdefault(refname, []), old,
                        new)

            # reference should use new (translated) 'refname'.
            # * reference target ".. _Python: ..." is not translatable.
            # * use translated refname for section refname.
            # * inline reference "`Python <...>`_" has no 'refname'.
            def is_refnamed_ref(node):
                # type: (nodes.Node) -> bool
                return isinstance(node, nodes.reference) and  \
                    'refname' in node

            old_refs = node.traverse(is_refnamed_ref)
            new_refs = patch.traverse(is_refnamed_ref)
            if len(old_refs) != len(new_refs):
                logger.warning('inconsistent references in translated message',
                               location=node)
            old_ref_names = [r['refname'] for r in old_refs]
            new_ref_names = [r['refname'] for r in new_refs]
            orphans = list(set(old_ref_names) - set(new_ref_names))
            for new in new_refs:
                if not self.document.has_name(new['refname']):
                    # Maybe refname is translated but target is not translated.
                    # Note: multiple translated refnames break link ordering.
                    if orphans:
                        new['refname'] = orphans.pop(0)
                    else:
                        # orphan refnames is already empty!
                        # reference number is same in new_refs and old_refs.
                        pass

                self.document.note_refname(new)

            # refnamed footnote and citation should use original 'ids'.
            def is_refnamed_footnote_ref(node):
                # type: (nodes.Node) -> bool
                footnote_ref_classes = (nodes.footnote_reference,
                                        nodes.citation_reference)
                return isinstance(node, footnote_ref_classes) and \
                    'refname' in node

            old_refs = node.traverse(is_refnamed_footnote_ref)
            new_refs = patch.traverse(is_refnamed_footnote_ref)
            refname_ids_map = {}
            if len(old_refs) != len(new_refs):
                logger.warning('inconsistent references in translated message',
                               location=node)
            for old in old_refs:
                refname_ids_map[old["refname"]] = old["ids"]
            for new in new_refs:
                refname = new["refname"]
                if refname in refname_ids_map:
                    new["ids"] = refname_ids_map[refname]

            # Original pending_xref['reftarget'] contain not-translated
            # target name, new pending_xref must use original one.
            # This code restricts to change ref-targets in the translation.
            old_refs = node.traverse(addnodes.pending_xref)
            new_refs = patch.traverse(addnodes.pending_xref)
            xref_reftarget_map = {}
            if len(old_refs) != len(new_refs):
                logger.warning(
                    'inconsistent term references in translated message',
                    location=node)

            def get_ref_key(node):
                # type: (nodes.Node) -> Tuple[unicode, unicode, unicode]
                case = node["refdomain"], node["reftype"]
                if case == ('std', 'term'):
                    return None
                else:
                    return (
                        node["refdomain"],
                        node["reftype"],
                        node['reftarget'],
                    )
Beispiel #22
0
    def write_doc(self, docname, doctree):
        catalog = self.catalogs[find_catalog(docname,
                                             self.config.gettext_compact)]

        for node, msg in extract_messages(doctree):
            catalog.add(msg, node)