def po_align(): name = "po_align" p = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description="""\ {0}: align PO file contents according to the original file location Version: {1} {2} """.format(name, poutils.version, poutils.copyright), epilog="See {}(1) manpage for more.".format(name), ) p.add_argument("po", help="Input PO file name. Output PO file suffix: .aligned") args = p.parse_args() master = poutils.PotData() with open(args.po, "r") as fp: master.read_po(file=fp) master.set_all_index() index_map = [] for j, item in enumerate(master): for i in item.index: index_map.append((i, j)) index_map.sort() aligned = poutils.PotData() for i, j in index_map: aligned.append(copy.copy(master[j])) aligned.set_all_syncid() with open(args.po + ".aligned", "w") as fp: # Never use msguniq here aligned.output_raw(file=fp) return
def po_update(): name = "po_update" p = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description="""\ {0}: Update msgstr with new_msgid if msgstr == previous_msgid Version: {1} {2} """.format(name, poutils.version, poutils.copyright), epilog="See {}(1) manpage for more.".format(name), ) p.add_argument( "-k", "--keep", action="store_true", default=False, help="keep original file as *.orig", ) p.add_argument("po", help="PO file") args = p.parse_args() master = poutils.PotData() with open(args.po, "r") as fp: master.read_po(file=fp) master.update_msgstr() if args.keep: shutil.move(args.po, args.po + ".orig") with open(args.po, "w") as fp: master.output_po(file=fp) return
def po_previous(): name = "po_previous" p = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description="""\ {0}: Recover previous msgid (from wdiff-format) Version: {1} This revert the po file converted by the po_wdiff to the original state. {2} """.format(name, poutils.version, poutils.copyright), epilog='See "po_wdiff -h".', ) p.add_argument( "-k", "--keep", action="store_true", default=False, help="keep original file as *.orig", ) p.add_argument("po", help="PO file") args = p.parse_args() master = poutils.PotData() with open(args.po, "r") as fp: master.read_po(file=fp) master.previous_msgid() if args.keep: shutil.move(args.po, args.po + ".orig") with open(args.po, "w") as fp: master.output_po(file=fp) return
def po_wdiff(): name = "po_wdiff" p = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description="""\ {0}: Add wdiff data to previous Version: {1} {2} """.format( name, poutils.version, poutils.copyright ), epilog="""\ If you enables "--previous" option for "msgmerge", and upstream text changes, corresponding entry in the updated po file looks like: --- #, fuzzy #| msgid "previous english text" msgid "new english text" msgstr "old translated text" --- Sometimes, it's not easy to see what is the change. This convert the "#| msgid" line into wdiff. You can revert this conversion using the "po_previous" command. """, ) p.add_argument( "-k", "--keep", action="store_true", default=False, help="keep original file as *.orig", ) p.add_argument("po", help="PO file") args = p.parse_args() master = poutils.PotData() with open(args.po, "r") as fp: master.read_po(file=fp) master.wdiff_msgid() if args.keep: shutil.move(args.po, args.po + ".orig") with open(args.po, "w") as fp: master.output_po(file=fp) return
def po_check(): name = "po_check" p = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description="""\ {0}: check matching between msgid and msgstr in a PO file Version: {1} {2} """.format(name, poutils.version, poutils.copyright), epilog="See {}(1) manpage for more.".format(name), ) p.add_argument( "-f", "--force_check", action="store_true", default=False, help="force to check msgstr even for the fuzzy msgstr", ) p.add_argument( "-i", "--itstool", action="store_true", default=False, help="filter for itstool generated PO file", ) p.add_argument( "-r", "--raw", action="store_true", default=False, help="raw output without msguniq", ) p.add_argument("po", help="Input PO file name. Output PO file suffix: .checked") args = p.parse_args() master = poutils.PotData() with open(args.po, "r") as fp: master.read_po(file=fp) master.check_xml(force_check=args.force_check, itstool=args.itstool) with open(args.po + ".checked", "w") as fp: master.output_po(file=fp, raw=args.raw) return
def po_clean(): name = "po_clean" p = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description="""\ {0}: make a PO file clean by removing identical ones as msgid Version: {1} {2} """.format(name, poutils.version, poutils.copyright), epilog="See {}(1) manpage for more.".format(name), ) p.add_argument( "-k", "--keep_fuzzy", action="store_true", default=False, help="keep all fuzzy markers", ) p.add_argument( "-r", "--raw", action="store_true", default=False, help="raw output without uniq", ) p.add_argument("po", help="Input PO file name. Output PO file suffix: .cleaned") args = p.parse_args() master = poutils.PotData() with open(args.po, "r") as fp: master.read_po(file=fp) master.clean_msgstr( pattern_extracted=r"<screen>", pattern_msgid=r"^https?://", keep_fuzzy=args.keep_fuzzy, ) with open(args.po + ".cleaned", "w") as fp: master.output_po(file=fp, raw=args.raw) return
def po_rm_fuzzy(): name = "po_rm_fuzzy" p = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description="""\ {0}: remove fuzzy flag from a PO file Version: {1} {2} """.format(name, poutils.version, poutils.copyright), epilog="See {}(1) manpage for more.".format(name), ) p.add_argument( "po", help="Input PO file name. Output PO file suffix: .fuzzy_removed") args = p.parse_args() master = poutils.PotData() with open(args.po, "r") as fp: master.read_po(file=fp) master.rm_fuzzy_all() with open(args.po + ".fuzzy_removed", "w") as fp: master.output_po(file=fp) return
def po_combine(): name = "po_combine" p = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description="""\ {0}: make a PO file by combining the master and translated POT Version: {1} This po_combine is a generic helper program to facilitate the reverse translation workflow to create PO data from the master data and the translated data. {2} """.format(name, poutils.version, poutils.copyright), epilog="""\ ## The normal workflow Normal translation workflow using the gettext-like infrastructure is: master data --+--> POT --[translation]--> PO --+ +--------------------------------+--> translated data The POT data is automatically generated without human intervention from the master data. Let's call this functionality as the POT extractor. E.g.: * xgettext (original tool by the gettext infrastructure covering strings in programs) * po4a-gettextize without -l (from po4a covering SGML, DocBookXML, ...) * itstool with -j (from itstool covering XML) * sphinx.builders.gettext (MessageCatalogBuilder class in Sphinx covering RST) The human translator adds the translation data to the POT data to make the PO data. The PO data can be automatically updated by the updated POT data using msgmerge when the master data is updated. The translated data can be generated by the updated PO data and the master data. (Sometimes the PO data may need to be converted to the MO format by msgfmt.) * gettext() (embedded function by the gettext infrastructure, alias is _()) * po4a-translate (extension tool po4a generating l10n SGML, DocBookXML, ...) * itstool with -m (extension tool itstool generating l10n XML) * ??? (Sphinx may be converting directly to the final format data such html for each l10n targets from the combination of the master data and the PO data without generating l10n RST source.) This workflow helps to keep up translated output in sync with the master output. ## The reverse workflow When you decide to change data format, migration of the PO data is a non-trivial task even if there is a good conversion tool from the data format to the new ones. We need a reverse workflow to generate the PO data from the master data and the corresponding translated data for any gettext derivative infrastructure. This po_combine provides a generic reverse workflow as follows: master data ------> POT --> [po_align] --+ +--[po_combine] --> PO translated data --> POT --> [po_align] --+ * the POT extractor of each tool infrastructure creates the master POT data by placing the converted master data in the normal master data position. * the POT extractor of each tool infrastructure creates the translated POT data by placing the converted translated data in the normal master data position. * Manually edit the converted master POT data and translated POT data to align and match exactly. * The optional po_align may help debug POT alignment issues. * The PO data generator po_combine creates the converted PO data from the master POT data and translated POT data. When the master and translated data doesn't use PO system and you want to migrate to PO based source structure, po_combine is a great help. I find the requirement to align and match the master data and the translated data in their original format is more trouble than doing it on the POT data. This is very true when the translated data is based on older master data with many mussing paragraphs. For such cases, I see some advantage of po_combine over native PO generation mechanism such as po4a-gettextize with -l option for po4a. You can add some extracted markers such as untranslated strings (numbers, embedded XML tags, ...) to the comment section of POT file before manually edit them for alignment. This kind of marker strings and the original POT markers such as "#. type: Content of: <book><chapter><title>" should make the align and match task easier with this po_combine workflow. (po_align works for this.) When you are migrating from an old data format to a new data format with some conversion program, po_combine can help you to create updated PO file corresponding to the new translated data with its generic workflow. The alignment in the old data format is an optional task if you take this path. But the better alignment in the old data format reduces the manual alignment work on the new data format. For the better alignment in the old format, disabling features such as ones provided by the add_.../ files or any other ways to insert translator credits or similar in the old source ensures better matched old source data. Different strings (msgid) in master may be translated into a same string (msgstr) in translation. This often happens when capitalization or any trivial typographical differences in master are merged into a same translated string in translation. This causes problem for po_combine to function well. If the number of extracted strings for the translation is less than the original for seemingly perfectly aligned data, this is probably the reason. Also, if the translation misses some tags such as <_:footnote-1/>, then alignment becomes broken and very hard to identify. For this, use a command to check the consistency. po_check -i <LANG>.po Use po_align to ensure easier matching (for po4a) and smooth operation of po_combine. It is easier to debug source issues with po_align + po_combine. When you have perfectly aligned data, the use of the native PO generation mechanism such as po4a-gettextize with -l option for po4a may have advantage over the use of po_combine. (Also, for poxml, split2po facilitates this functionality.) You may need to clean the resulting PO using po_clean and po_rm_fuzzy for this native approach. TIP: pandoc is a nice document data format conversion tool. See {}(1) manpage for more. """.format(name), ) p.add_argument( "-a", "--aligned", action="store_true", default=False, help="generate aligned but duplicated content for debug", ) p.add_argument("-v", "--verbose", action="store_true", default=False, help="verbose output") p.add_argument("master_pot", help="Input POT file from the English source") p.add_argument("translated_pot", help="Input POT file from the translated source") p.add_argument("output", help="Output PO file") args = p.parse_args() master = poutils.PotData() translation = poutils.PotData() with open(args.master_pot, "r") as fp_master_pot: master.read_po(file=fp_master_pot, verbose=args.verbose) with open(args.translated_pot, "r") as fp_translated_pot: translation.read_po(file=fp_translated_pot) master.normalize() translation.normalize() master.combine_pots(translation) master.clean_msgstr(pattern_extracted=r"<screen>", pattern_msgid=r"^https?://") with open(args.output, "w") as fp_output: master.output_po(file=fp_output, aligned=args.aligned) return