예제 #1
0
def po_align():
    name = "po_align"
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""\
{0}: align PO file contents according to the original file location            Version: {1}

{2}
""".format(name, poutils.version, poutils.copyright),
        epilog="See {}(1) manpage for more.".format(name),
    )
    p.add_argument("po",
                   help="Input PO file name.  Output PO file suffix: .aligned")
    args = p.parse_args()
    master = poutils.PotData()
    with open(args.po, "r") as fp:
        master.read_po(file=fp)
    master.set_all_index()
    index_map = []
    for j, item in enumerate(master):
        for i in item.index:
            index_map.append((i, j))
    index_map.sort()
    aligned = poutils.PotData()
    for i, j in index_map:
        aligned.append(copy.copy(master[j]))
    aligned.set_all_syncid()
    with open(args.po + ".aligned", "w") as fp:
        # Never use msguniq here
        aligned.output_raw(file=fp)
    return
예제 #2
0
def po_update():
    name = "po_update"
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""\
{0}: Update msgstr with new_msgid if msgstr == previous_msgid Version: {1}

{2}
""".format(name, poutils.version, poutils.copyright),
        epilog="See {}(1) manpage for more.".format(name),
    )
    p.add_argument(
        "-k",
        "--keep",
        action="store_true",
        default=False,
        help="keep original file as *.orig",
    )
    p.add_argument("po", help="PO file")
    args = p.parse_args()
    master = poutils.PotData()
    with open(args.po, "r") as fp:
        master.read_po(file=fp)
    master.update_msgstr()
    if args.keep:
        shutil.move(args.po, args.po + ".orig")
    with open(args.po, "w") as fp:
        master.output_po(file=fp)
    return
예제 #3
0
def po_previous():
    name = "po_previous"
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""\
{0}: Recover previous msgid (from wdiff-format)              Version: {1}

This revert the po file converted by the po_wdiff to the original state.

{2}
""".format(name, poutils.version, poutils.copyright),
        epilog='See "po_wdiff -h".',
    )
    p.add_argument(
        "-k",
        "--keep",
        action="store_true",
        default=False,
        help="keep original file as *.orig",
    )
    p.add_argument("po", help="PO file")
    args = p.parse_args()
    master = poutils.PotData()
    with open(args.po, "r") as fp:
        master.read_po(file=fp)
    master.previous_msgid()
    if args.keep:
        shutil.move(args.po, args.po + ".orig")
    with open(args.po, "w") as fp:
        master.output_po(file=fp)
    return
예제 #4
0
def po_wdiff():
    name = "po_wdiff"
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""\
{0}: Add wdiff data to previous                             Version: {1}

{2}
""".format(
            name, poutils.version, poutils.copyright
        ),
        epilog="""\
If you enables "--previous" option for "msgmerge", and upstream text
changes, corresponding entry in the updated po file looks like:

---
#, fuzzy
#| msgid "previous english text"
msgid "new english text"
msgstr "old translated text"
---

Sometimes, it's not easy to see what is the change.  This convert
the "#| msgid" line into wdiff.  You can revert this conversion
using the "po_previous" command.
""",
    )
    p.add_argument(
        "-k",
        "--keep",
        action="store_true",
        default=False,
        help="keep original file as *.orig",
    )
    p.add_argument("po", help="PO file")
    args = p.parse_args()
    master = poutils.PotData()
    with open(args.po, "r") as fp:
        master.read_po(file=fp)
    master.wdiff_msgid()
    if args.keep:
        shutil.move(args.po, args.po + ".orig")
    with open(args.po, "w") as fp:
        master.output_po(file=fp)
    return
예제 #5
0
def po_check():
    name = "po_check"
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""\
{0}: check matching between msgid and msgstr in a PO file  Version: {1}

{2}
""".format(name, poutils.version, poutils.copyright),
        epilog="See {}(1) manpage for more.".format(name),
    )
    p.add_argument(
        "-f",
        "--force_check",
        action="store_true",
        default=False,
        help="force to check msgstr even for the fuzzy msgstr",
    )
    p.add_argument(
        "-i",
        "--itstool",
        action="store_true",
        default=False,
        help="filter for itstool generated PO file",
    )
    p.add_argument(
        "-r",
        "--raw",
        action="store_true",
        default=False,
        help="raw output without msguniq",
    )
    p.add_argument("po",
                   help="Input PO file name.  Output PO file suffix: .checked")
    args = p.parse_args()
    master = poutils.PotData()
    with open(args.po, "r") as fp:
        master.read_po(file=fp)
    master.check_xml(force_check=args.force_check, itstool=args.itstool)
    with open(args.po + ".checked", "w") as fp:
        master.output_po(file=fp, raw=args.raw)
    return
예제 #6
0
def po_clean():
    name = "po_clean"
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""\
{0}: make a PO file clean by removing identical ones as msgid  Version: {1}

{2}
""".format(name, poutils.version, poutils.copyright),
        epilog="See {}(1) manpage for more.".format(name),
    )
    p.add_argument(
        "-k",
        "--keep_fuzzy",
        action="store_true",
        default=False,
        help="keep all fuzzy markers",
    )
    p.add_argument(
        "-r",
        "--raw",
        action="store_true",
        default=False,
        help="raw output without uniq",
    )
    p.add_argument("po",
                   help="Input PO file name.  Output PO file suffix: .cleaned")
    args = p.parse_args()
    master = poutils.PotData()
    with open(args.po, "r") as fp:
        master.read_po(file=fp)
    master.clean_msgstr(
        pattern_extracted=r"<screen>",
        pattern_msgid=r"^https?://",
        keep_fuzzy=args.keep_fuzzy,
    )
    with open(args.po + ".cleaned", "w") as fp:
        master.output_po(file=fp, raw=args.raw)
    return
예제 #7
0
def po_rm_fuzzy():
    name = "po_rm_fuzzy"
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""\
{0}: remove fuzzy flag from a PO file                      Version: {1}

{2}
""".format(name, poutils.version, poutils.copyright),
        epilog="See {}(1) manpage for more.".format(name),
    )
    p.add_argument(
        "po",
        help="Input PO file name.  Output PO file suffix: .fuzzy_removed")
    args = p.parse_args()
    master = poutils.PotData()
    with open(args.po, "r") as fp:
        master.read_po(file=fp)
    master.rm_fuzzy_all()
    with open(args.po + ".fuzzy_removed", "w") as fp:
        master.output_po(file=fp)
    return
예제 #8
0
def po_combine():
    name = "po_combine"
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description="""\
{0}: make a PO file by combining the master and translated POT  Version: {1}

This po_combine is a generic helper program to facilitate the reverse
translation workflow to create PO data from the master data and the translated
data.

{2}
""".format(name, poutils.version, poutils.copyright),
        epilog="""\
## The normal workflow

Normal translation workflow using the gettext-like infrastructure is:

    master data --+--> POT --[translation]--> PO --+
                  +--------------------------------+--> translated data

The POT data is automatically generated without human intervention from the
master data.  Let's call this functionality as the POT extractor. E.g.:

* xgettext (original tool by the gettext infrastructure covering strings in
  programs)
* po4a-gettextize without -l (from po4a covering SGML, DocBookXML, ...)
* itstool with -j (from itstool covering XML)
* sphinx.builders.gettext (MessageCatalogBuilder class in Sphinx covering RST)

The human translator adds the translation data to the POT data to make the PO
data.

The PO data can be automatically updated by the updated POT data using msgmerge
when the master data is updated.

The translated data can be generated by the updated PO data and the master
data. (Sometimes the PO data may need to be converted to the MO format by
msgfmt.)

* gettext() (embedded function by the gettext infrastructure, alias is _())
* po4a-translate (extension tool po4a generating l10n SGML, DocBookXML, ...)
* itstool with -m (extension tool itstool generating l10n XML)
* ??? (Sphinx may be converting directly to the final format data such html for
  each l10n targets from the combination of the master data and the PO data
  without generating l10n RST source.)

This workflow helps to keep up translated output in sync with the master
output.

## The reverse workflow

When you decide to change data format, migration of the PO data is a
non-trivial task even if there is a good conversion tool from the data format
to the new ones.  We need a reverse workflow to generate the PO data from the
master data and the corresponding translated data for any gettext derivative
infrastructure.

This po_combine provides a generic reverse workflow as follows:

    master data ------> POT --> [po_align] --+
                                             +--[po_combine] --> PO
    translated data --> POT --> [po_align] --+

* the POT extractor of each tool infrastructure creates the master POT data by
  placing the converted master data in the normal master data position.
* the POT extractor of each tool infrastructure creates the translated POT data
  by placing the converted translated data in the normal master data position.
* Manually edit the converted master POT data and translated POT data to align
  and match exactly.
* The optional po_align may help debug POT alignment issues.
* The PO data generator po_combine creates the converted PO data from the
  master POT data and translated POT data.

When the master and translated data doesn't use PO system and you want to
migrate to PO based source structure, po_combine is a great help.  I find the
requirement to align and match the master data and the translated data in their
original format is more trouble than doing it on the POT data.  This is very
true when the translated data is based on older master data with many mussing
paragraphs.  For such cases, I see some advantage of po_combine over native PO
generation mechanism such as po4a-gettextize with -l option for po4a.

You can add some extracted markers such as untranslated strings (numbers,
embedded XML tags, ...) to the comment section of POT file before manually edit
them for alignment. This kind of marker strings and the original POT markers
such as "#. type: Content of: <book><chapter><title>" should make the align and
match task easier with this po_combine workflow.  (po_align works for this.)

When you are migrating from an old data format to a new data format with some
conversion program, po_combine can help you to create updated PO file
corresponding to the new translated data with its generic workflow.  The
alignment in the old data format is an optional task if you take this path.
But the better alignment in the old data format reduces the manual alignment
work on the new data format.  For the better alignment in the old format,
disabling features such as ones provided by the add_.../ files or any other
ways to insert translator credits or similar in the old source ensures better
matched old source data.

Different strings (msgid) in master may be translated into a same string
(msgstr) in translation.  This often happens when capitalization or any trivial
typographical differences in master are merged into a same translated string in
translation.  This causes problem for po_combine to function well.  If the
number of extracted strings for the translation is less than the original for
seemingly perfectly aligned data, this is probably the reason.

Also, if the translation misses some tags such as <_:footnote-1/>, then
alignment becomes broken and very hard to identify.  For this, use a command
to check the consistency.

  po_check -i <LANG>.po

Use po_align to ensure easier matching (for po4a) and smooth operation of
po_combine.  It is easier to debug source issues with po_align + po_combine.

When you have perfectly aligned data, the use of the native PO generation
mechanism such as po4a-gettextize with -l option for po4a may have advantage
over the use of po_combine.  (Also, for poxml, split2po facilitates this
functionality.)  You may need to clean the resulting PO using po_clean
and po_rm_fuzzy for this native approach.

TIP: pandoc is a nice document data format conversion tool.

See {}(1) manpage for more.
""".format(name),
    )
    p.add_argument(
        "-a",
        "--aligned",
        action="store_true",
        default=False,
        help="generate aligned but duplicated content for debug",
    )
    p.add_argument("-v",
                   "--verbose",
                   action="store_true",
                   default=False,
                   help="verbose output")
    p.add_argument("master_pot", help="Input POT file from the English source")
    p.add_argument("translated_pot",
                   help="Input POT file from the translated source")
    p.add_argument("output", help="Output PO file")
    args = p.parse_args()
    master = poutils.PotData()
    translation = poutils.PotData()
    with open(args.master_pot, "r") as fp_master_pot:
        master.read_po(file=fp_master_pot, verbose=args.verbose)
        with open(args.translated_pot, "r") as fp_translated_pot:
            translation.read_po(file=fp_translated_pot)
    master.normalize()
    translation.normalize()
    master.combine_pots(translation)
    master.clean_msgstr(pattern_extracted=r"<screen>",
                        pattern_msgid=r"^https?://")
    with open(args.output, "w") as fp_output:
        master.output_po(file=fp_output, aligned=args.aligned)
    return