def test_mark_not_found_as_obsolete(tmp_file): original_md_file_content = ('Some string in the markdown\n\n' 'Another string\n\n') new_md_file_content = 'A new string\n' with tempfile.NamedTemporaryFile(suffix='.po') as po_file: with tmp_file(original_md_file_content, '.md') as original_md_filepath: md2po = Md2Po(original_md_filepath) pofile = md2po.extract(po_filepath=po_file.name, save=True) assert pofile == f'''# msgid "" msgstr "" #: {original_md_filepath}:block 1 (paragraph) msgid "Some string in the markdown" msgstr "" #: {original_md_filepath}:block 2 (paragraph) msgid "Another string" msgstr "" ''' with tmp_file(new_md_file_content, '.md') as new_md_filepath: md2po = Md2Po( new_md_filepath, mark_not_found_as_obsolete=True, ) pofile = md2po.extract(po_filepath=po_file.name) assert pofile == f'''#
def test_command_event(abort_event): def error_when_unknown_command_event( self, command, comment, original_command, ): # here 'normalize_mdpo_command' is added to simulate a real behaviour, # is not related with the test itself if normalize_mdpo_command(command) is None and abort_event: raise ValueError('unhandled command for testing') content = '<!-- mdpo-unknown-command -->' md2po = Md2Po( content, events={ 'command': error_when_unknown_command_event, }, ) if abort_event: expected_msg = 'unhandled command for testing' with pytest.raises(ValueError, match=expected_msg): md2po.extract() else: md2po.extract()
def test_md2po_save_without_po_filepath(): content = 'foo\n\nbar\n\nbaz\n' md2po = Md2Po(content) expected_msg = ( "The argument 'save' does not make sense without passing the" " argument 'po_filepath'.") with pytest.raises(ValueError, match=expected_msg): md2po.extract(save=True)
def test_content_extractor(): markdown_content = '''# Header 1 Some awesome text ```fakelanguage code block ``` ''' md2po = Md2Po(markdown_content) assert str(md2po.extract()) == '''#
def test_enter_block_event(abort_event): content = 'Hello\n' md2po = Md2Po( content, events={ 'enter_block': lambda *_: not abort_event, }, ) md2po.extract() assert md2po._current_top_level_block_number == (1 if not abort_event else 0)
def test_text_event(abort_event): content = '<!-- mdpo-disable-codeblocks -->\n' md2po = Md2Po( content, events={ 'text': lambda *_: abort_event, }, include_codeblocks=True, ) md2po.extract() # if not text executed, disable codeblocks command is not parsed assert md2po.include_codeblocks is not abort_event
def test_disabled_entries(): content = '''This must be included. <!-- mdpo-disable --> This must be ignored. <!-- mdpo-enable-next-line --> This must be included also. This must be ignored also. <!-- mdpo-enable-next-line --> # This header must be included Other line that must be ignored. <!-- mdpo-enable --> The last line also must be included. ''' md2po = Md2Po(content) md2po.extract() expected_msgids = [ 'This must be ignored.', 'This must be ignored also.', 'Other line that must be ignored.', ] assert len(md2po.disabled_entries) == len(expected_msgids) for expected_msgid in expected_msgids: _found_msgid = False for entry in md2po.disabled_entries: if entry.msgid == expected_msgid: _found_msgid = True assert _found_msgid, ( f"'{expected_msgid}' msgid not found inside disabled_entries")
def test_enter_leave_block_event(abort_event): def print_is_task_list_item(self, block, details): if block is md4c.BlockType.LI: sys.stdout.write(str(details['is_task'])) content = 'Hello\n\n- List item\n- [ ] Another list item\n' stdout = io.StringIO() md2po = Md2Po( content, events={ 'enter_block': print_is_task_list_item, 'leave_block': lambda *_: not abort_event, }, ) with redirect_stdout(stdout): md2po.extract() assert stdout.getvalue() == 'FalseTrue' # if not leave_block executed, uls deep remains at 1 assert bool(md2po._uls_deep) is abort_event
def on_page_markdown(self, markdown, page, config, files): """Event executed when markdown content of a page is collected. Here happen most of the logic handled by the plugin: * For each documentation page, creates another documentation page for each language that will be translated (part here and part inside the `mkdocs-mdpo` extension, see :py:mod:`mkdocs_mdpo_plugin.extension` module). """ # only process original files, pages created for translation # are ignored if hasattr(page.file, '_mdpo_language'): return # get minimum translation requirements min_translated = self.config['min_translated_messages'] # check if the file is excluded to be translated # # the implementation here opts for create the file but # not creating the PO for translations # # other option would be to skip the languages loop entirely, but # this would not create the file for a language and the navigation # will do cross language linking, which worsens the user experience excluded_page = page.file.src_path in self.config['exclude'] # navigation pages titles translations and new pages urls are # stored in dictionaries by language, so we can translate the # titles in their own PO files and then change the URLs # (see `on_page_context` event) if page.title not in self.translations.nav: # lang: [title, url] self.translations.nav[page.title] = {} # extract translations from original Markdown file md2po = Md2Po( markdown, events=build_md2po_events(self.extensions.markdown), mark_not_found_as_obsolete=False, location=False, ignore_msgids=self.config['ignore_msgids'], ) original_po = md2po.extract() po2md_events = build_po2md_events(self.extensions.markdown) _mdpo_languages = {} # {lang: file} for language in self._translation_languages(): if not excluded_page: # if the page has been excluded from being translated lang_docs_dir = self._language_dir( config['docs_dir'], language, ) compendium_filepath = os.path.join( lang_docs_dir, '_compendium.po', ) # create compendium if doesn't exists, load to memory if language not in self.translations.compendium_files: if not os.path.isfile(compendium_filepath): compendium_pofile = polib.POFile() compendium_pofile.save(compendium_filepath) self.translations.compendium_files[language] = \ compendium_filepath # intialize compendium messages cache self.translations.compendium_msgstrs_tr[language] = [] self.translations.compendium_msgids[language] = [] compendium_pofile = polib.pofile(compendium_filepath) # create pofile of the page for each language po_filepath = os.path.join( lang_docs_dir, f'{page.file.src_path}.po', ) os.makedirs( os.path.abspath(os.path.dirname(po_filepath)), exist_ok=True, ) if not os.path.isfile(po_filepath): po = polib.POFile() else: po = polib.pofile(po_filepath) for entry in original_po: if entry not in po: po.append(entry) _translated_entries_msgids = [] _translated_entries_msgstrs = [] # translate metadata and config settings # # translate title translated_page_title, _title_in_pofile = (None, False) # translated custom description page_meta_description = page.meta.get('description') translated_page_desc, _desc_in_pofile = (None, False) # translate site_name and site_description translated_config_settings = { key: None for key in self.config['translate'] } _config_settings_in_pofile = { key: False for key in self.config['translate'] } if page_meta_description: for entry in po: if entry.msgid == page.title: # matching title found entry.obsolete = False translated_page_title = entry.msgstr _title_in_pofile = True if entry.msgstr: _translated_entries_msgstrs.append( entry.msgstr, ) if entry.msgid == page_meta_description: # matching description found entry.obsolete = False translated_page_desc = entry.msgstr _desc_in_pofile = True if entry.msgstr: _translated_entries_msgstrs.append( page_meta_description, ) # add description to PO file if not added if not _desc_in_pofile: po.insert( 0, polib.POEntry( msgid=page_meta_description, msgstr='', ), ) _translated_entries_msgids.append( page_meta_description, ) else: for entry in po: if entry.msgid == page.title: # matching title found entry.obsolete = False translated_page_title = entry.msgstr _title_in_pofile = True if entry.msgstr: _translated_entries_msgstrs.append( entry.msgstr, ) for entry in compendium_pofile: for setting in translated_config_settings: if entry.msgid == config[setting]: # matching translated setting found entry.obsolete = False translated_config_settings[setting] = entry.msgstr _config_settings_in_pofile[setting] = True _translated_entries_msgids.append(config[setting]) if entry.msgstr: _translated_entries_msgstrs.append( entry.msgstr, ) if f'mdpo-{setting}' not in entry.flags: entry.flags.append(f'mdpo-{setting}') # add title to PO file if not added if not _title_in_pofile: po.insert( 0, polib.POEntry(msgid=page.title, msgstr=''), ) _translated_entries_msgids.append(page.title) # add translatable configuration settings to PO file for setting, _translated in _config_settings_in_pofile.items(): if not _translated: compendium_pofile.insert( 0, polib.POEntry( msgid=config[setting], msgstr='', flags=[f'mdpo-{setting}'], ), ) compendium_pofile.save(compendium_filepath) # add temporally compendium entries to language pofiles for entry in compendium_pofile: if entry not in po and entry.msgstr: po.append(entry) po.save(po_filepath) # if a minimum number of translations are required to include # the file, compute number of untranslated messages if min_translated: n_translated, n_total = po_messages_stats(po) if language not in self.translations.stats: self.translations.stats[language] = { 'total': n_total, 'translated': n_translated, } else: self.translations.stats[language][ 'total' ] += n_total self.translations.stats[language][ 'translated' ] += n_translated # translate part of the markdown producing a translated file # content (the rest of the translations are handled by # extensions, see `extension` module) po2md = Po2Md( [po_filepath, compendium_filepath], events=po2md_events, wrapwidth=math.inf, # ignore line wrapping ) if page_meta_description: po2md.translated_entries.append( polib.POEntry( msgid=page_meta_description, msgstr='', ), ) po2md.translated_entries.append( polib.POEntry( msgid=page.title, msgstr='', ), ) content = po2md.translate(markdown) _disabled_msgids = [ entry.msgid for entry in po2md.disabled_entries ] _disabled_msgids.extend(self.config['ignore_msgids']) for entry in po2md.translated_entries: _translated_entries_msgstrs.append(entry.msgstr) _translated_entries_msgids.append(entry.msgid) else: # mock variables if the file is excluded from being translated content = markdown translated_page_title = None translated_page_desc = None _disabled_msgids = [] _translated_entries_msgstrs = [] _translated_entries_msgids = [] po, po_filepath = [], None translated_config_settings = {} temp_abs_path = self.translations.files[ page.file.src_path ][language] temp_abs_dirpath = os.path.dirname(temp_abs_path) os.makedirs(temp_abs_dirpath, exist_ok=True) with open(temp_abs_path, 'w') as f: f.write(content) new_file = mkdocs.structure.files.File( temp_abs_path, temp_abs_dirpath, config['site_dir'], config['use_directory_urls'], ) new_file.url = os.path.relpath( temp_abs_path, self.translations.tempdir.name, ) new_file._mdpo_language = language new_page_title = translated_page_title or page.title new_page = mkdocs.structure.pages.Page( new_page_title, new_file, config, ) if translated_page_desc: new_page.meta['description'] = translated_page_desc # overwrite the edit uri for the translated page targetting # the PO file located in the repository if config.get('repo_url') and config.get('edit_uri'): new_page.edit_url = urljoin( config['repo_url'], os.path.normpath( os.path.join( config['edit_uri'], os.path.relpath(po_filepath, config['docs_dir']), ), ), ) files.append(new_file) _mdpo_languages[language] = new_file # create translation object translation = Translation( language, po, po_filepath, [entry.msgid for entry in po], # po_msgids _translated_entries_msgstrs, _translated_entries_msgids, _disabled_msgids, ) self.translations.current = translation if language not in self.translations.config_settings: self.translations.config_settings[language] = ( translated_config_settings ) if language not in self.translations.page_metas: self.translations.page_metas[language] = {} if ( new_file.src_path not in self.translations.page_metas[language] ): self.translations.page_metas[ language ][new_file.src_path] = new_page.meta # change file url url = removesuffix(new_page.file.url, '.md') + '.html' if config['use_directory_urls']: url = removesuffix(url, 'index.html') new_page.file.url = url # the title of the page will be 'page.title' (the original) # if the file is being excluded from translations using the # 'exclude' plugin's config setting self.translations.nav[page.title][language] = [ new_page_title, new_page.file.url, ] # set languages for search when 'cross_language_search' # is disabled # # if it is enabled, this configuration is handled in the # `on_config` event if self.config['cross_language_search'] is False: if ( config['theme'].name != 'material' and 'search' in config['plugins'] ): # Mkdocs theme languages lunr_languages = get_lunr_languages() search_langs = ( config['plugins']['search'].config['lang'] or [] ) if language in lunr_languages: if language not in search_langs: # set only the language to search config['plugins']['search'].config['lang'] = ( [language] ) logger.debug( f"[mdpo] Setting ['{language}'] for" " 'plugins.search.lang' option", ) elif language != 'en': logger.info( f"[mdpo] Language '{language}' is not supported by" ' lunr.js, not setting it for' " 'plugins.search.lang' option", ) mkdocs.commands.build._populate_page( new_page, config, files, dirty=( '--dirty' in sys.argv and '-c' not in sys.argv and '--clean' not in sys.argv ), ) if language not in self.translations.all: self.translations.all[language] = [] self.translations.all[language].append(translation) self.translations.current = None # reconfigure default language for plugins and themes after # translated pages are built if ( config['theme'].name != 'material' and 'search' in config['plugins'] and hasattr(config['plugins']['search'], 'lang') ): config['plugins']['search'].config['lang'] = [ self.config['default_language'], ] # set languages to render in sitemap.xml page.file._mdpo_languages = _mdpo_languages return remove_mdpo_commands_preserving_escaped(markdown)
def markdown_to_pofile_to_markdown( langs, input_paths_glob, output_paths_schema, extensions=DEFAULT_MD4C_GENERIC_PARSER_EXTENSIONS, command_aliases={}, location=True, debug=False, po_wrapwidth=78, md_wrapwidth=80, po_encoding=None, md_encoding=None, md2po_kwargs={}, po2md_kwargs={}, _check_saved_files_changed=False, ): """Translate a set of Markdown files using PO files. Args: langs (list): List of languages used to build the output directories. input_paths_glob (str): Glob covering Markdown files to translate. output_paths_schema (str): Path schema for outputs, built using placeholders. There is a mandatory placeholder for languages: ``{lang}``; and one optional for output basename: ``{basename}``. For example, for the schema ``locale/{lang}``, the languages ``['es', 'fr']`` and a ``README.md`` as input, the next files will be written: * ``locale/es/README.po`` * ``locale/es/README.md`` * ``locale/fr/README.po`` * ``locale/fr/README.md`` Note that you can omit ``{basename}``, specifying a directory for each language with ``locale/{lang}`` for this example. Unexistent directories and files will be created, so you don't have to prepare the output directories before the execution. extensions (list): md4c extensions used to parse markdown content, formatted as a list of 'pymd4c' keyword arguments. You can see all available at `pymd4c repository <https://github.com/dominickpastore /pymd4c#parser-option-flags>`_. command_aliases (dict): Mapping of aliases to use custom mdpo command names in comments. The ``mdpo-`` prefix in command names resolution is optional. For example, if you want to use ``<!-- mdpo-on -->`` instead of ``<!-- mdpo-enable -->``, you can pass the dictionaries ``{"mdpo-on": "mdpo-enable"}`` or ``{"mdpo-on": "enable"}`` to this parameter. location (bool): Store references of top-level blocks in which are found the messages in PO file ``#: reference`` comments. debug (bool): Add events displaying all parsed elements in the extraction process. po_wrapwidth (int): Maximum width for PO files. md_wrapwidth (int): Maximum width for produced Markdown contents, when possible. po_encoding (str): PO files encoding. md_encoding (str): Markdown files encoding. md2po_kwargs (dict): Additional optional arguments passed to ``markdown_to_pofile`` function. po2md_kwargs (dict): Additional optional arguments passed to ``pofile_to_markdown`` function. """ if '{lang}' not in output_paths_schema: raise ValueError( "You must pass the replacer '{lang}' inside the argument" " 'output_paths_schema'.", ) try: input_paths_glob_ = glob.glob(input_paths_glob) except Exception as err: if (err.__module__ in ['re', 'sre_constants'] and err.__class__.__name__ == 'error'): # some strings like '[s-m]' will produce # 're.error: bad character range ... at position' raise ValueError( "The argument 'input_paths_glob' must be a valid glob or file" ' path.', ) raise err else: if not input_paths_glob_: raise FileNotFoundError( f'The glob \'{input_paths_glob}\' does not match any file.', ) _saved_files_changed = None if not _check_saved_files_changed else False for filepath in input_paths_glob_: for lang in langs: md_ext = os.path.splitext(filepath)[-1] file_basename = os.path.splitext(os.path.basename(filepath))[0] format_kwargs = {'lang': lang} if '{basename}' in output_paths_schema: format_kwargs['basename'] = file_basename po_filepath = output_paths_schema.format(**format_kwargs) po_basename = os.path.basename(po_filepath) po_dirpath = (os.path.dirname(po_filepath) if (po_basename.count('.') or file_basename == po_basename) else po_filepath) os.makedirs(os.path.abspath(po_dirpath), exist_ok=True) if os.path.isdir(po_filepath): po_filepath = os.path.join( po_filepath.rstrip(os.sep), f'{os.path.basename(filepath)}.po', ) if not po_filepath.endswith('.po'): po_filepath += '.po' format_kwargs['ext'] = md_ext.lstrip('.') md_filepath = output_paths_schema.format(**format_kwargs) if os.path.isdir(md_filepath): md_filepath = (md_filepath.rstrip(os.sep) + os.sep + os.path.basename(filepath)) # md2po md2po = Md2Po( filepath, extensions=extensions, command_aliases=command_aliases, debug=debug, location=location, wrapwidth=po_wrapwidth, _check_saved_files_changed=_check_saved_files_changed, **md2po_kwargs, ) md2po.extract( save=True, po_filepath=po_filepath, po_encoding=po_encoding, md_encoding=md_encoding, ) if _check_saved_files_changed and _saved_files_changed is False: _saved_files_changed = md2po._saved_files_changed # po2md po2md = Po2Md( [po_filepath], command_aliases=command_aliases, debug=debug, po_encoding=po_encoding, wrapwidth=md_wrapwidth, _check_saved_files_changed=_check_saved_files_changed, **po2md_kwargs, ) po2md.translate( filepath, save=md_filepath, md_encoding=md_encoding, ) if _check_saved_files_changed and _saved_files_changed is False: _saved_files_changed = po2md._saved_files_changed return _saved_files_changed
def test_ignore_msgids(): content = 'foo\n\nbar\n\nbaz\n' md2po = Md2Po(content, ignore_msgids=['foo', 'baz']) assert str(md2po.extract(content)) == '''#
def test_msgstr(): content = 'Mensaje por defecto' md2po = Md2Po(content, msgstr='Default message') assert str(md2po.extract(content)) == '''#