Ejemplo n.º 1
0
def test_mark_not_found_as_obsolete(tmp_file):
    original_md_file_content = ('Some string in the markdown\n\n'
                                'Another string\n\n')
    new_md_file_content = 'A new string\n'

    with tempfile.NamedTemporaryFile(suffix='.po') as po_file:
        with tmp_file(original_md_file_content, '.md') as original_md_filepath:
            md2po = Md2Po(original_md_filepath)
            pofile = md2po.extract(po_filepath=po_file.name, save=True)
        assert pofile == f'''#
msgid ""
msgstr ""

#: {original_md_filepath}:block 1 (paragraph)
msgid "Some string in the markdown"
msgstr ""

#: {original_md_filepath}:block 2 (paragraph)
msgid "Another string"
msgstr ""
'''

        with tmp_file(new_md_file_content, '.md') as new_md_filepath:
            md2po = Md2Po(
                new_md_filepath,
                mark_not_found_as_obsolete=True,
            )
            pofile = md2po.extract(po_filepath=po_file.name)
        assert pofile == f'''#
Ejemplo n.º 2
0
def test_command_event(abort_event):
    def error_when_unknown_command_event(
        self,
        command,
        comment,
        original_command,
    ):
        # here 'normalize_mdpo_command' is added to simulate a real behaviour,
        # is not related with the test itself
        if normalize_mdpo_command(command) is None and abort_event:
            raise ValueError('unhandled command for testing')

    content = '<!-- mdpo-unknown-command -->'

    md2po = Md2Po(
        content,
        events={
            'command': error_when_unknown_command_event,
        },
    )

    if abort_event:
        expected_msg = 'unhandled command for testing'
        with pytest.raises(ValueError, match=expected_msg):
            md2po.extract()
    else:
        md2po.extract()
Ejemplo n.º 3
0
def test_md2po_save_without_po_filepath():
    content = 'foo\n\nbar\n\nbaz\n'
    md2po = Md2Po(content)

    expected_msg = (
        "The argument 'save' does not make sense without passing the"
        " argument 'po_filepath'.")
    with pytest.raises(ValueError, match=expected_msg):
        md2po.extract(save=True)
Ejemplo n.º 4
0
def test_content_extractor():
    markdown_content = '''# Header 1

Some awesome text

```fakelanguage
code block
```
'''

    md2po = Md2Po(markdown_content)
    assert str(md2po.extract()) == '''#
Ejemplo n.º 5
0
def test_enter_block_event(abort_event):
    content = 'Hello\n'

    md2po = Md2Po(
        content,
        events={
            'enter_block': lambda *_: not abort_event,
        },
    )

    md2po.extract()

    assert md2po._current_top_level_block_number == (1
                                                     if not abort_event else 0)
Ejemplo n.º 6
0
def test_text_event(abort_event):
    content = '<!-- mdpo-disable-codeblocks -->\n'

    md2po = Md2Po(
        content,
        events={
            'text': lambda *_: abort_event,
        },
        include_codeblocks=True,
    )

    md2po.extract()

    # if not text executed, disable codeblocks command is not parsed
    assert md2po.include_codeblocks is not abort_event
Ejemplo n.º 7
0
def test_disabled_entries():
    content = '''This must be included.

<!-- mdpo-disable -->

This must be ignored.

<!-- mdpo-enable-next-line -->
This must be included also.

This must be ignored also.

<!-- mdpo-enable-next-line -->
# This header must be included

Other line that must be ignored.

<!-- mdpo-enable -->

The last line also must be included.
'''

    md2po = Md2Po(content)
    md2po.extract()

    expected_msgids = [
        'This must be ignored.',
        'This must be ignored also.',
        'Other line that must be ignored.',
    ]

    assert len(md2po.disabled_entries) == len(expected_msgids)

    for expected_msgid in expected_msgids:
        _found_msgid = False
        for entry in md2po.disabled_entries:
            if entry.msgid == expected_msgid:
                _found_msgid = True

        assert _found_msgid, (
            f"'{expected_msgid}' msgid not found inside disabled_entries")
Ejemplo n.º 8
0
def test_enter_leave_block_event(abort_event):
    def print_is_task_list_item(self, block, details):
        if block is md4c.BlockType.LI:
            sys.stdout.write(str(details['is_task']))

    content = 'Hello\n\n- List item\n- [ ] Another list item\n'

    stdout = io.StringIO()
    md2po = Md2Po(
        content,
        events={
            'enter_block': print_is_task_list_item,
            'leave_block': lambda *_: not abort_event,
        },
    )

    with redirect_stdout(stdout):
        md2po.extract()
    assert stdout.getvalue() == 'FalseTrue'

    # if not leave_block executed, uls deep remains at 1
    assert bool(md2po._uls_deep) is abort_event
Ejemplo n.º 9
0
    def on_page_markdown(self, markdown, page, config, files):
        """Event executed when markdown content of a page is collected.

        Here happen most of the logic handled by the plugin:

        * For each documentation page, creates another documentation page
          for each language that will be translated (part here and part
          inside the `mkdocs-mdpo` extension, see
          :py:mod:`mkdocs_mdpo_plugin.extension` module).
        """
        # only process original files, pages created for translation
        # are ignored
        if hasattr(page.file, '_mdpo_language'):
            return

        # get minimum translation requirements
        min_translated = self.config['min_translated_messages']

        # check if the file is excluded to be translated
        #
        # the implementation here opts for create the file but
        # not creating the PO for translations
        #
        # other option would be to skip the languages loop entirely, but
        # this would not create the file for a language and the navigation
        # will do cross language linking, which worsens the user experience
        excluded_page = page.file.src_path in self.config['exclude']

        # navigation pages titles translations and new pages urls are
        # stored in dictionaries by language, so we can translate the
        # titles in their own PO files and then change the URLs
        # (see `on_page_context` event)
        if page.title not in self.translations.nav:
            # lang: [title, url]
            self.translations.nav[page.title] = {}

        # extract translations from original Markdown file
        md2po = Md2Po(
            markdown,
            events=build_md2po_events(self.extensions.markdown),
            mark_not_found_as_obsolete=False,
            location=False,
            ignore_msgids=self.config['ignore_msgids'],
        )
        original_po = md2po.extract()

        po2md_events = build_po2md_events(self.extensions.markdown)

        _mdpo_languages = {}  # {lang: file}

        for language in self._translation_languages():
            if not excluded_page:
                # if the page has been excluded from being translated
                lang_docs_dir = self._language_dir(
                    config['docs_dir'],
                    language,
                )

                compendium_filepath = os.path.join(
                    lang_docs_dir,
                    '_compendium.po',
                )

                # create compendium if doesn't exists, load to memory
                if language not in self.translations.compendium_files:
                    if not os.path.isfile(compendium_filepath):
                        compendium_pofile = polib.POFile()
                        compendium_pofile.save(compendium_filepath)
                    self.translations.compendium_files[language] = \
                        compendium_filepath

                    # intialize compendium messages cache
                    self.translations.compendium_msgstrs_tr[language] = []
                    self.translations.compendium_msgids[language] = []

                compendium_pofile = polib.pofile(compendium_filepath)

                # create pofile of the page for each language
                po_filepath = os.path.join(
                    lang_docs_dir,
                    f'{page.file.src_path}.po',
                )
                os.makedirs(
                    os.path.abspath(os.path.dirname(po_filepath)),
                    exist_ok=True,
                )
                if not os.path.isfile(po_filepath):
                    po = polib.POFile()
                else:
                    po = polib.pofile(po_filepath)

                for entry in original_po:
                    if entry not in po:
                        po.append(entry)

                _translated_entries_msgids = []
                _translated_entries_msgstrs = []

                # translate metadata and config settings
                #
                # translate title
                translated_page_title, _title_in_pofile = (None, False)
                # translated custom description
                page_meta_description = page.meta.get('description')
                translated_page_desc, _desc_in_pofile = (None, False)

                # translate site_name and site_description
                translated_config_settings = {
                    key: None for key in self.config['translate']
                }
                _config_settings_in_pofile = {
                    key: False for key in self.config['translate']
                }

                if page_meta_description:
                    for entry in po:
                        if entry.msgid == page.title:
                            # matching title found
                            entry.obsolete = False
                            translated_page_title = entry.msgstr
                            _title_in_pofile = True
                            if entry.msgstr:
                                _translated_entries_msgstrs.append(
                                    entry.msgstr,
                                )

                        if entry.msgid == page_meta_description:
                            # matching description found
                            entry.obsolete = False
                            translated_page_desc = entry.msgstr
                            _desc_in_pofile = True
                            if entry.msgstr:
                                _translated_entries_msgstrs.append(
                                    page_meta_description,
                                )

                    # add description to PO file if not added
                    if not _desc_in_pofile:
                        po.insert(
                            0,
                            polib.POEntry(
                                msgid=page_meta_description,
                                msgstr='',
                            ),
                        )

                        _translated_entries_msgids.append(
                            page_meta_description,
                        )
                else:
                    for entry in po:
                        if entry.msgid == page.title:
                            # matching title found
                            entry.obsolete = False
                            translated_page_title = entry.msgstr
                            _title_in_pofile = True
                            if entry.msgstr:
                                _translated_entries_msgstrs.append(
                                    entry.msgstr,
                                )

                for entry in compendium_pofile:
                    for setting in translated_config_settings:
                        if entry.msgid == config[setting]:
                            # matching translated setting found
                            entry.obsolete = False
                            translated_config_settings[setting] = entry.msgstr
                            _config_settings_in_pofile[setting] = True
                            _translated_entries_msgids.append(config[setting])
                            if entry.msgstr:
                                _translated_entries_msgstrs.append(
                                    entry.msgstr,
                                )
                            if f'mdpo-{setting}' not in entry.flags:
                                entry.flags.append(f'mdpo-{setting}')

                # add title to PO file if not added
                if not _title_in_pofile:
                    po.insert(
                        0,
                        polib.POEntry(msgid=page.title, msgstr=''),
                    )
                    _translated_entries_msgids.append(page.title)

                # add translatable configuration settings to PO file
                for setting, _translated in _config_settings_in_pofile.items():
                    if not _translated:
                        compendium_pofile.insert(
                            0,
                            polib.POEntry(
                                msgid=config[setting],
                                msgstr='',
                                flags=[f'mdpo-{setting}'],
                            ),
                        )
                compendium_pofile.save(compendium_filepath)

                # add temporally compendium entries to language pofiles
                for entry in compendium_pofile:
                    if entry not in po and entry.msgstr:
                        po.append(entry)
                po.save(po_filepath)

                # if a minimum number of translations are required to include
                # the file, compute number of untranslated messages
                if min_translated:
                    n_translated, n_total = po_messages_stats(po)
                    if language not in self.translations.stats:
                        self.translations.stats[language] = {
                            'total': n_total,
                            'translated': n_translated,
                        }
                    else:
                        self.translations.stats[language][
                            'total'
                        ] += n_total
                        self.translations.stats[language][
                            'translated'
                        ] += n_translated

                # translate part of the markdown producing a translated file
                # content (the rest of the translations are handled by
                # extensions, see `extension` module)
                po2md = Po2Md(
                    [po_filepath, compendium_filepath],
                    events=po2md_events,
                    wrapwidth=math.inf,  # ignore line wrapping
                )
                if page_meta_description:
                    po2md.translated_entries.append(
                        polib.POEntry(
                            msgid=page_meta_description,
                            msgstr='',
                        ),
                    )
                po2md.translated_entries.append(
                    polib.POEntry(
                        msgid=page.title,
                        msgstr='',
                    ),
                )
                content = po2md.translate(markdown)

                _disabled_msgids = [
                    entry.msgid for entry in po2md.disabled_entries
                ]
                _disabled_msgids.extend(self.config['ignore_msgids'])

                for entry in po2md.translated_entries:
                    _translated_entries_msgstrs.append(entry.msgstr)
                    _translated_entries_msgids.append(entry.msgid)
            else:
                # mock variables if the file is excluded from being translated
                content = markdown
                translated_page_title = None
                translated_page_desc = None
                _disabled_msgids = []
                _translated_entries_msgstrs = []
                _translated_entries_msgids = []
                po, po_filepath = [], None
                translated_config_settings = {}

            temp_abs_path = self.translations.files[
                page.file.src_path
            ][language]
            temp_abs_dirpath = os.path.dirname(temp_abs_path)
            os.makedirs(temp_abs_dirpath, exist_ok=True)
            with open(temp_abs_path, 'w') as f:
                f.write(content)

            new_file = mkdocs.structure.files.File(
                temp_abs_path,
                temp_abs_dirpath,
                config['site_dir'],
                config['use_directory_urls'],
            )
            new_file.url = os.path.relpath(
                temp_abs_path,
                self.translations.tempdir.name,
            )
            new_file._mdpo_language = language

            new_page_title = translated_page_title or page.title
            new_page = mkdocs.structure.pages.Page(
                new_page_title,
                new_file,
                config,
            )
            if translated_page_desc:
                new_page.meta['description'] = translated_page_desc

            # overwrite the edit uri for the translated page targetting
            # the PO file located in the repository
            if config.get('repo_url') and config.get('edit_uri'):
                new_page.edit_url = urljoin(
                    config['repo_url'],
                    os.path.normpath(
                        os.path.join(
                            config['edit_uri'],
                            os.path.relpath(po_filepath, config['docs_dir']),
                        ),
                    ),
                )

            files.append(new_file)
            _mdpo_languages[language] = new_file

            # create translation object
            translation = Translation(
                language,
                po,
                po_filepath,
                [entry.msgid for entry in po],  # po_msgids
                _translated_entries_msgstrs,
                _translated_entries_msgids,
                _disabled_msgids,
            )
            self.translations.current = translation
            if language not in self.translations.config_settings:
                self.translations.config_settings[language] = (
                    translated_config_settings
                )
            if language not in self.translations.page_metas:
                self.translations.page_metas[language] = {}
            if (
                new_file.src_path
                not in self.translations.page_metas[language]
            ):
                self.translations.page_metas[
                    language
                ][new_file.src_path] = new_page.meta

            # change file url
            url = removesuffix(new_page.file.url, '.md') + '.html'
            if config['use_directory_urls']:
                url = removesuffix(url, 'index.html')
            new_page.file.url = url

            # the title of the page will be 'page.title' (the original)
            # if the file is being excluded from translations using the
            # 'exclude' plugin's config setting
            self.translations.nav[page.title][language] = [
                new_page_title, new_page.file.url,
            ]

            # set languages for search when 'cross_language_search'
            # is disabled
            #
            # if it is enabled, this configuration is handled in the
            # `on_config` event
            if self.config['cross_language_search'] is False:
                if (
                    config['theme'].name != 'material' and
                    'search' in config['plugins']
                ):
                    # Mkdocs theme languages
                    lunr_languages = get_lunr_languages()
                    search_langs = (
                        config['plugins']['search'].config['lang'] or []
                    )
                    if language in lunr_languages:
                        if language not in search_langs:
                            # set only the language to search
                            config['plugins']['search'].config['lang'] = (
                                [language]
                            )
                            logger.debug(
                                f"[mdpo] Setting ['{language}'] for"
                                " 'plugins.search.lang' option",
                            )
                    elif language != 'en':
                        logger.info(
                            f"[mdpo] Language '{language}' is not supported by"
                            ' lunr.js, not setting it for'
                            " 'plugins.search.lang' option",
                        )

            mkdocs.commands.build._populate_page(
                new_page,
                config,
                files,
                dirty=(
                    '--dirty' in sys.argv and
                    '-c' not in sys.argv and '--clean' not in sys.argv
                ),
            )

            if language not in self.translations.all:
                self.translations.all[language] = []
            self.translations.all[language].append(translation)

        self.translations.current = None

        # reconfigure default language for plugins and themes after
        # translated pages are built
        if (
            config['theme'].name != 'material'
            and 'search' in config['plugins']
            and hasattr(config['plugins']['search'], 'lang')
        ):
            config['plugins']['search'].config['lang'] = [
                self.config['default_language'],
            ]

        # set languages to render in sitemap.xml
        page.file._mdpo_languages = _mdpo_languages

        return remove_mdpo_commands_preserving_escaped(markdown)
Ejemplo n.º 10
0
def markdown_to_pofile_to_markdown(
    langs,
    input_paths_glob,
    output_paths_schema,
    extensions=DEFAULT_MD4C_GENERIC_PARSER_EXTENSIONS,
    command_aliases={},
    location=True,
    debug=False,
    po_wrapwidth=78,
    md_wrapwidth=80,
    po_encoding=None,
    md_encoding=None,
    md2po_kwargs={},
    po2md_kwargs={},
    _check_saved_files_changed=False,
):
    """Translate a set of Markdown files using PO files.

    Args:
        langs (list): List of languages used to build the output directories.
        input_paths_glob (str): Glob covering Markdown files to translate.
        output_paths_schema (str): Path schema for outputs, built using
            placeholders. There is a mandatory placeholder for languages:
            ``{lang}``; and one optional for output basename: ``{basename}``.
            For example, for the schema ``locale/{lang}``, the languages
            ``['es', 'fr']`` and a ``README.md`` as input, the next files will
            be written:

            * ``locale/es/README.po``
            * ``locale/es/README.md``
            * ``locale/fr/README.po``
            * ``locale/fr/README.md``

            Note that you can omit ``{basename}``, specifying a directory for
            each language with ``locale/{lang}`` for this example.
            Unexistent directories and files will be created, so you don't
            have to prepare the output directories before the execution.
        extensions (list): md4c extensions used to parse markdown content,
            formatted as a list of 'pymd4c' keyword arguments. You can see all
            available at `pymd4c repository <https://github.com/dominickpastore
            /pymd4c#parser-option-flags>`_.
        command_aliases (dict): Mapping of aliases to use custom mdpo command
            names in comments. The ``mdpo-`` prefix in command names resolution
            is optional. For example, if you want to use ``<!-- mdpo-on -->``
            instead of ``<!-- mdpo-enable -->``, you can pass the dictionaries
            ``{"mdpo-on": "mdpo-enable"}`` or ``{"mdpo-on": "enable"}`` to this
            parameter.
        location (bool): Store references of top-level blocks in which are
            found the messages in PO file ``#: reference`` comments.
        debug (bool): Add events displaying all parsed elements in the
            extraction process.
        po_wrapwidth (int): Maximum width for PO files.
        md_wrapwidth (int): Maximum width for produced Markdown contents, when
            possible.
        po_encoding (str): PO files encoding.
        md_encoding (str): Markdown files encoding.
        md2po_kwargs (dict): Additional optional arguments passed to
            ``markdown_to_pofile`` function.
        po2md_kwargs (dict): Additional optional arguments passed to
            ``pofile_to_markdown`` function.
    """
    if '{lang}' not in output_paths_schema:
        raise ValueError(
            "You must pass the replacer '{lang}' inside the argument"
            " 'output_paths_schema'.", )

    try:
        input_paths_glob_ = glob.glob(input_paths_glob)
    except Exception as err:
        if (err.__module__ in ['re', 'sre_constants']
                and err.__class__.__name__ == 'error'):
            # some strings like '[s-m]' will produce
            # 're.error: bad character range ... at position'
            raise ValueError(
                "The argument 'input_paths_glob' must be a valid glob or file"
                ' path.', )
        raise err
    else:
        if not input_paths_glob_:
            raise FileNotFoundError(
                f'The glob \'{input_paths_glob}\' does not match any file.', )

    _saved_files_changed = None if not _check_saved_files_changed else False

    for filepath in input_paths_glob_:
        for lang in langs:
            md_ext = os.path.splitext(filepath)[-1]

            file_basename = os.path.splitext(os.path.basename(filepath))[0]

            format_kwargs = {'lang': lang}
            if '{basename}' in output_paths_schema:
                format_kwargs['basename'] = file_basename
            po_filepath = output_paths_schema.format(**format_kwargs)

            po_basename = os.path.basename(po_filepath)
            po_dirpath = (os.path.dirname(po_filepath) if
                          (po_basename.count('.')
                           or file_basename == po_basename) else po_filepath)

            os.makedirs(os.path.abspath(po_dirpath), exist_ok=True)
            if os.path.isdir(po_filepath):
                po_filepath = os.path.join(
                    po_filepath.rstrip(os.sep),
                    f'{os.path.basename(filepath)}.po',
                )
            if not po_filepath.endswith('.po'):
                po_filepath += '.po'

            format_kwargs['ext'] = md_ext.lstrip('.')
            md_filepath = output_paths_schema.format(**format_kwargs)
            if os.path.isdir(md_filepath):
                md_filepath = (md_filepath.rstrip(os.sep) + os.sep +
                               os.path.basename(filepath))

            # md2po
            md2po = Md2Po(
                filepath,
                extensions=extensions,
                command_aliases=command_aliases,
                debug=debug,
                location=location,
                wrapwidth=po_wrapwidth,
                _check_saved_files_changed=_check_saved_files_changed,
                **md2po_kwargs,
            )
            md2po.extract(
                save=True,
                po_filepath=po_filepath,
                po_encoding=po_encoding,
                md_encoding=md_encoding,
            )
            if _check_saved_files_changed and _saved_files_changed is False:
                _saved_files_changed = md2po._saved_files_changed

            # po2md
            po2md = Po2Md(
                [po_filepath],
                command_aliases=command_aliases,
                debug=debug,
                po_encoding=po_encoding,
                wrapwidth=md_wrapwidth,
                _check_saved_files_changed=_check_saved_files_changed,
                **po2md_kwargs,
            )
            po2md.translate(
                filepath,
                save=md_filepath,
                md_encoding=md_encoding,
            )
            if _check_saved_files_changed and _saved_files_changed is False:
                _saved_files_changed = po2md._saved_files_changed

    return _saved_files_changed
Ejemplo n.º 11
0
def test_ignore_msgids():
    content = 'foo\n\nbar\n\nbaz\n'
    md2po = Md2Po(content, ignore_msgids=['foo', 'baz'])
    assert str(md2po.extract(content)) == '''#
Ejemplo n.º 12
0
def test_msgstr():
    content = 'Mensaje por defecto'
    md2po = Md2Po(content, msgstr='Default message')
    assert str(md2po.extract(content)) == '''#