def checkTranslationsForSpam(inputFilePath):

    print(u"Checking", inputFilePath)
    templateCatalog = Catalog(inputFilePath)

    # If language codes were specified on the command line, filter by those.
    filters = sys.argv[1:]

    # Load existing translation catalogs.
    existingTranslationCatalogs = []
    l10nFolderPath = os.path.dirname(inputFilePath)

    # .pot is one letter longer than .po, but the dot that separates the locale
    # code from the rest of the filename in .po files makes up for that.
    charactersToSkip = len(os.path.basename(inputFilePath))

    for filename in os.listdir(l10nFolderPath):
        if len(filename
               ) > 3 and filename[-3:] == ".po" and filename[:4] != "long":
            if not filters or filename[:-charactersToSkip] in filters:
                if os.path.basename(inputFilePath)[:-4] == filename.split(
                        '.')[-2]:
                    existingTranslationCatalogs.append([
                        filename[:-charactersToSkip],
                        os.path.join(l10nFolderPath, filename)
                    ])

    urlPattern = re.compile(
        u"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
    )

    # Check the URLs in translations against the URLs in the translation template.
    for languageCode, pofile in existingTranslationCatalogs:
        translationCatalog = Catalog(pofile)
        for templateMessage in templateCatalog:
            translationMessage = translationCatalog.select_by_key(
                templateMessage.msgctxt, templateMessage.msgid)
            if translationMessage:
                templateSingularString = templateMessage.msgid
                templateUrls = urlPattern.findall(templateMessage.msgid)
                # Assert that the same URL is used in both the plural and singular forms.
                if templateMessage.msgid_plural and len(
                        templateMessage.msgstr) > 1:
                    pluralUrls = urlPattern.findall(templateMessage.msgstr[0])
                    for url in pluralUrls:
                        if url not in templateUrls:
                            print(
                                u"Different URLs in singular and plural source strings for ‘{}’ in ‘{}’"
                                .format(templateMessage.msgid, inputFilePath))
                for translationString in translationMessage[0].msgstr:
                    translationUrls = urlPattern.findall(translationString)
                    for translationUrl in translationUrls:
                        if translationUrl not in templateUrls:
                            print(
                                u"{}: Found the “{}” URL in the translation, which does not match any of the URLs in the translation template: {}"
                                .format(languageCode, translationUrl,
                                        u", ".join(templateUrls)))
Example #2
0
 def icat_w(cat, icat_pack):
     if icat_pack[0] is None:
         #print "===> inverting: %s" % cat.filename
         icat = Catalog("", create=True, monitored=False)
         for msg in cat:
             upprogf()
             imsg = _msg_invert_cp(msg)
             if imsg not in icat:
                 icat.add_last(imsg)
         icat_pack[0] = icat
     return icat_pack[0]
Example #3
0
def extract_proper_words(path, dict_en, dict_local):

    cat = Catalog(path)

    for msg in cat:
        words = proper_words(msg.msgstr[0], True, cat.accelerator(),
                             msg.format)
        for word in words:
            if _ent_proper_word.match(word):
                if not dict_en.check(str(word)) and not dict_local.check(
                        str(word)):
                    #report("%s" %(word))
                    dict_local.session_dict(str(word))
def checkTranslationsForSpam(inputFilePath):

    print(u"Checking", inputFilePath)
    templateCatalog = Catalog(inputFilePath)

    # If language codes were specified on the command line, filter by those.
    filters = sys.argv[1:]

    # Load existing translation catalogs.
    existingTranslationCatalogs = []
    l10nFolderPath = os.path.dirname(inputFilePath)

    # .pot is one letter longer than .po, but the dot that separates the locale
    # code from the rest of the filename in .po files makes up for that.
    charactersToSkip = len(os.path.basename(inputFilePath))

    for filename in os.listdir(l10nFolderPath):
        if len(filename) > 3 and filename[-3:] == ".po" and filename[:4] != "long":
            if not filters or filename[:-charactersToSkip] in filters:
                if os.path.basename(inputFilePath)[:-4] == filename.split('.')[-2]:
                    existingTranslationCatalogs.append([filename[:-charactersToSkip], os.path.join(l10nFolderPath, filename)])

    urlPattern = re.compile(u"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+")

    # Check the URLs in translations against the URLs in the translation template.
    for languageCode, pofile in existingTranslationCatalogs:
        translationCatalog = Catalog(pofile)
        for templateMessage in templateCatalog:
            translationMessage = translationCatalog.select_by_key(templateMessage.msgctxt, templateMessage.msgid)
            if translationMessage:
                templateSingularString = templateMessage.msgid
                templateUrls = urlPattern.findall(templateMessage.msgid)
                # Assert that the same URL is used in both the plural and singular forms.
                if templateMessage.msgid_plural and len(templateMessage.msgstr) > 1:
                    pluralUrls = urlPattern.findall(templateMessage.msgstr[0])
                    for url in pluralUrls:
                        if url not in templateUrls:
                            print(u"Different URLs in singular and plural source strings for ‘{}’ in ‘{}’".format(
                                templateMessage.msgid,
                                inputFilePath))
                for translationString in translationMessage[0].msgstr:
                    translationUrls = urlPattern.findall(translationString)
                    for translationUrl in translationUrls:
                        if translationUrl not in templateUrls:
                            print(u"{}: Found the “{}” URL in the translation, which does not match any of the URLs in the translation template: {}".format(
                                    languageCode,
                                    translationUrl,
                                    u", ".join(templateUrls)))
Example #5
0
def pairs_update_effort (pspecs, quiet=False):

    update_progress = None
    if len(pspecs) > 1 and not quiet:
        update_progress = init_file_progress([vp[1] for fp, vp in pspecs],
                            addfmt=t_("@info:progress", "Diffing: %(file)s"))
    nntw_total = 0.0
    for fpaths, vpaths in pspecs:
        upprogf = None
        if update_progress:
            upprogf = lambda: update_progress(vpaths[1])
            upprogf()
        # Quick check if files are binary equal.
        if fpaths[0] and fpaths[1] and filecmp.cmp(*fpaths):
            continue
        cats = []
        for fpath in fpaths:
            try:
                cats.append(Catalog(fpath, create=True, monitored=False))
            except:
                error_wcl(_("@info",
                            "Cannot parse catalog '%(file)s'.",
                            file=fpath), norem=[fpath])
        nntw = cats_update_effort(cats[0], cats[1], upprogf)
        nntw_total += nntw
    if update_progress:
        update_progress()

    updeff = [
        ("nntw", _("@item", "nominal newly translated words"),
         nntw_total, "%.0f" % nntw_total),
    ]
    return updeff
Example #6
0
    def process_header(self, hdr, cat):

        # Cancel prior correction catalog.
        self.corr_cat = None

        # Construct expected path to correction catalog.
        corr_path = cat.filename.replace(self.pd_srch, self.pd_repl, 1)

        # Open the catalog if it exists.
        if os.path.isfile(corr_path):
            self.corr_cat = Catalog(corr_path)
Example #7
0
def translate_direct(paths, tsbuilder, options):

    transervs = {}

    catpaths = collect_catalogs(paths)
    for catpath in catpaths:

        # Collect messages and texts to translate.
        cat = Catalog(catpath)
        if options.accel is not None:  # force explicitly given accelerator
            cat.set_accelerator(options.accel)
        texts = []
        msgs = []
        for msg in cat:
            if to_translate(msg, options):
                msgf = MessageUnsafe(msg)
                remove_accel_msg(msgf, cat)
                texts.append(msgf.msgid)
                if msg.msgid_plural is not None:
                    texts.append(msgf.msgid_plural)
                msgs.append(msg)

        # Translate collected texts.
        slang = options.slang or "en"
        transerv = get_transerv(slang, options.tlang, cat, cat, tsbuilder)
        texts_tr = transerv.translate(texts) if texts else []
        if texts_tr is None:
            warning(
                _("@info",
                  "Translation service failure on '%(file)s'.",
                  file=catpath))
            continue
        for i, text in enumerate(texts_tr):
            text = reduce_for_encoding(text, cat.encoding())
            texts_tr[i] = text

        # Put translated texts into messages.
        singlepls = cat.plural_indices_single()
        for msg in msgs:
            msgid_tr = texts_tr.pop(0)
            if msg.msgid_plural is not None:
                msgid_plural_tr = texts_tr.pop(0)
            if msgid_tr:
                if msg.msgid_plural is not None:
                    for i in range(len(msg.msgstr)):
                        if i in singlepls:
                            msg.msgstr[i] = msgid_tr
                        else:
                            msg.msgstr[i] = msgid_plural_tr
                else:
                    msg.msgstr[0] = msgid_tr
                decorate(msg, options)

        sync_rep(cat, msgs)
Example #8
0
def main ():

    locale.setlocale(locale.LC_ALL, "")

    # Get defaults for command line options from global config.
    cfgsec = pology_config.section("porewrap")

    # Setup options and parse the command line.
    usage = _("@info command usage",
        "%(cmd)s [options] POFILE...",
        cmd="%prog")
    desc = _("@info command description",
        "Rewrap message strings in PO files.")
    ver = _("@info command version",
        u"%(cmd)s (Pology) %(version)s\n"
        u"Copyright © 2007, 2008, 2009, 2010 "
        u"Chusslove Illich (Часлав Илић) <%(email)s>",
        cmd="%prog", version=version(), email="*****@*****.**")

    opars = ColorOptionParser(usage=usage, description=desc, version=ver)
    opars.add_option(
        "-v", "--verbose",
        action="store_true", dest="verbose", default=False,
        help=_("@info command line option description",
               "More detailed progress information."))
    add_cmdopt_wrapping(opars)
    add_cmdopt_filesfrom(opars)

    (op, fargs) = opars.parse_args()

    if len(fargs) < 1 and not op.files_from:
        error(_("@info", "No input files given."))

    # Could use some speedup.
    try:
        import psyco
        psyco.full()
    except ImportError:
        pass

    # Assemble list of files.
    fnames = collect_paths_cmdline(rawpaths=fargs,
                                   filesfrom=op.files_from,
                                   respathf=collect_catalogs,
                                   abort=True)

    # Rewrap all catalogs.
    for fname in fnames:
        if op.verbose:
            report(_("@info:progress", "Rewrapping: %(file)s", file=fname))
        cat = Catalog(fname, monitored=False)
        wrapping = select_field_wrapping(cfgsec, cat, op)
        cat.set_wrapping(wrapping)
        cat.sync(force=True)
Example #9
0
def diff_cats(cat1,
              cat2,
              ecat,
              merge=True,
              colorize=False,
              wrem=True,
              wadd=True,
              noobs=False,
              upprogf=None):

    upprogf = upprogf or (lambda: None)

    dpairs = _pair_msgs(cat1, cat2, merge, wrem, wadd, noobs, upprogf)

    # Order pairings such that they follow order of messages in
    # the new catalog wherever the new message exists.
    # For unpaired old messages, do heuristic analysis of any
    # renamings of source files, and then insert diffed messages
    # according to source references of old messages.
    dpairs_by2 = [x for x in dpairs if x[1]]
    dpairs_by2.sort(key=lambda x: x[1].refentry)
    dpairs_by1 = [x for x in dpairs if not x[1]]
    fnsyn = None
    if dpairs_by1:
        fnsyn = cat2.detect_renamed_sources(cat1)

    # Make the diffs.
    # Must not add diffed messages directly to global ediff catalog,
    # because then heuristic insertion would throw them all over.
    # Instead add to local ediff catalog, then copy in order to global.
    ndiffed = 0
    lecat = Catalog("", create=True, monitored=False)
    for cdpairs, cfnsyn in ((dpairs_by2, None), (dpairs_by1, fnsyn)):
        for msg1, msg2 in cdpairs:
            upprogf()
            ndiffed += _add_msg_diff(msg1, msg2, lecat, colorize, cfnsyn)
    for emsg in lecat:
        ecat.add(emsg, len(ecat))

    return ndiffed
Example #10
0
def self_merge_pofile (catpath, compendiums=[],
                       fuzzex=False, minwnex=0, minasfz=0.0, refuzzy=False,
                       cfgsec=None, cmlopt=None):

    # Create temporary files for merging.
    ext = ".tmp-selfmerge"
    catpath_mod = catpath + ext
    if ".po" in catpath:
        potpath = catpath.replace(".po", ".pot") + ext
    else:
        potpath = catpath + ".pot" + ext
    shutil.copyfile(catpath, catpath_mod)
    shutil.copyfile(catpath, potpath)

    # Open catalog for pre-processing.
    cat = Catalog(potpath, monitored=False)

    # Decide wrapping policy.
    wrapping = select_field_wrapping(cfgsec, cat, cmlopt)

    # From the dummy template, clean all active messages and
    # remove all obsolete messages.
    for msg in cat:
        if msg.obsolete:
            cat.remove_on_sync(msg)
        else:
            msg.clear()
    cat.sync()

    # Merge with dummy template.
    merge_pofile(catpath_mod, potpath, update=True, wrapping=wrapping,
                 cmppaths=compendiums, fuzzex=fuzzex,
                 minwnex=minwnex, minasfz=minasfz, refuzzy=refuzzy,
                 abort=True)

    # Overwrite original with temporary catalog.
    shutil.move(catpath_mod, catpath)
    os.unlink(potpath)
Example #11
0
def apply_ediff(op):

    # Read the ediff PO.
    dummy_stream_path = "<stdin>"
    if op.input:
        if not os.path.isfile(op.input):
            error(
                _("@info",
                  "Path '%(path)s' is not a file or does not exist.",
                  path=op.input))
        edfpath = op.input
        readfh = None
    else:
        edfpath = dummy_stream_path
        readfh = sys.stdin
    try:
        ecat = Catalog(edfpath, monitored=False, readfh=readfh)
    except:
        error(
            _("@info ediff is shorthand for \"embedded difference\"",
              "Error reading ediff '%(file)s'.",
              file=edfpath))

    # Split ediff by diffed catalog into original and new file paths,
    # header message, and ordinary messages.
    hmsgctxt = ecat.header.get_field_value(EDST.hmsgctxt_field)
    if hmsgctxt is None:
        error(
            _("@info",
              "Header field '%(field)s' is missing in the ediff.",
              field=EDST.hmsgctxt_field))
    edsplits = []
    cehmsg = None
    smsgid = u"\x00"
    ecat.add_last(MessageUnsafe(dict(msgctxt=hmsgctxt,
                                     msgid=smsgid)))  # sentry
    for emsg in ecat:
        if emsg.msgctxt == hmsgctxt:
            if cehmsg:
                # Record previous section.
                edsplits.append((fpaths, cehmsg, cemsgs))
                if emsg.msgid == smsgid:  # end sentry, avoid parsing below
                    break

            # Mine original and new file paths out of header.
            fpaths = []
            for fpath in emsg.msgid.split("\n")[:2]:
                # Strip leading "+ "/"- "
                fpath = fpath[2:]
                # Convert to planform path separators.
                fpath = re.sub(r"/+", os.path.sep, fpath)
                # Remove revision indicator.
                p = fpath.find(EDST.filerev_sep)
                if p >= 0:
                    fpath = fpath[:p]
                # Strip path and append directory as requested.
                if op.strip:
                    preflen = int(op.strip)
                    lst = fpath.split(os.path.sep, preflen)
                    if preflen + 1 == len(lst):
                        fpath = lst[preflen]
                    else:
                        fpath = os.path.basename(fpath)
                else:
                    fpath = os.path.basename(fpath)
                if op.directory and fpath:
                    fpath = os.path.join(op.directory, fpath)
                # All done.
                fpaths.append(fpath)

            cehmsg = emsg
            cemsgs = []
        else:
            cemsgs.append(emsg)

    # Prepare catalog for rejects and merges.
    rcat = Catalog("", create=True, monitored=False, wrapping=ecat.wrapping())
    init_ediff_header(rcat.header, hmsgctxt=hmsgctxt, extitle="rejects")

    # Apply diff to catalogs.
    for fpaths, ehmsg, emsgs in edsplits:
        # Open catalog for patching.
        fpath1, fpath2 = fpaths
        if fpath1:
            # Diff from an existing catalog, open it.
            if not os.path.isfile(fpath1):
                warning(
                    _("@info",
                      "Path '%(path)s' is not a file or does not exist, "
                      "skipping it.",
                      path=fpath1))
                continue
            try:
                cat = Catalog(fpath1)
            except:
                warning(
                    _("@info",
                      "Error reading catalog '%(file)s', skipping it.",
                      file=fpath1))
                continue
        elif fpath2:
            # New catalog added in diff, create it (or open if it exists).
            try:
                mkdirpath(os.path.dirname(fpath2))
                cat = Catalog(fpath2, create=True)
                if cat.created():
                    cat.set_wrapping(ecat.wrapping())
            except:
                if os.path.isfile(fpath2):
                    warning(
                        _("@info",
                          "Error reading catalog '%(file)s', skipping it.",
                          file=fpath1))
                else:
                    warning(
                        _("@info",
                          "Cannot create catalog '%(file)s', skipping it.",
                          file=fpath2))
                continue
        else:
            error(_("@info", "Both catalogs in ediff indicated not to exist."))

        # Do not try to patch catalog with embedded differences
        # (i.e. previously patched using -e).
        if cat.header.get_field_value(EDST.hmsgctxt_field) is not None:
            warning(
                _("@info", "Catalog '%(file)s' already contains "
                  "embedded differences, skipping it.",
                  file=cat.filename))
            continue

        # Do not try to patch catalog if the patch contains
        # unresolved split differences.
        if reduce(lambda r, x: r or _flag_ediff_to_new in x.flag, emsgs,
                  False):
            warning(
                _("@info", "Patch for catalog '%(file)s' contains unresolved "
                  "split differences, skipping it.",
                  file=cat.filename))
            continue

        # Patch the catalog.
        rejected_ehmsg = patch_header(cat, ehmsg, ecat, op)
        rejected_emsgs_flags = patch_messages(cat, emsgs, ecat, op)
        any_rejected = rejected_ehmsg or rejected_emsgs_flags
        if fpath2 or any_rejected:
            created = cat.created()
            if cat.sync():
                if not created:
                    if any_rejected and op.embed:
                        report(
                            _("@info:progress E is for \"with embedding\"",
                              "Partially patched (E): %(file)s",
                              file=cat.filename))
                    elif any_rejected:
                        report(
                            _("@info:progress",
                              "Partially patched: %(file)s",
                              file=cat.filename))
                    elif op.embed:
                        report(
                            _("@info:progress E is for \"with embedding\"",
                              "Patched (E): %(file)s",
                              file=cat.filename))
                    else:
                        report(
                            _("@info:progress",
                              "Patched: %(file)s",
                              file=cat.filename))
                else:
                    if op.embed:
                        report(
                            _("@info:progress E is for \"with embedding\"",
                              "Created (E): %(file)s",
                              file=cat.filename))
                    else:
                        report(
                            _("@info:progress",
                              "Created: %(file)s",
                              file=cat.filename))
            else:
                pass  #report("unchanged: %s" % cat.filename)
        else:
            os.unlink(fpath1)
            report(_("@info:progress", "Removed: %(file)s", file=fpath1))

        # If there were any rejects and reembedding is not in effect,
        # record the necessary to present them.
        if any_rejected and not op.embed:
            if not rejected_ehmsg:
                # Clean header diff.
                ehmsg.manual_comment = ehmsg.manual_comment[:1]
                ehmsg.msgstr[0] = u""
            rcat.add_last(ehmsg)
            for emsg, flag in rejected_emsgs_flags:
                # Reembed to avoid any conflicts.
                msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(emsg)
                emsg = msg_ediff(msg1_s,
                                 msg2_s,
                                 emsg=msg2_s,
                                 ecat=rcat,
                                 enoctxt=hmsgctxt)
                if flag:
                    emsg.flag.add(flag)
                rcat.add_last(emsg)

    # If there were any rejects, write them out.
    if len(rcat) > 0:
        # Construct paths for embedded diffs of rejects.
        rsuff = "rej"
        if ecat.filename != dummy_stream_path:
            rpath = ecat.filename
            p = rpath.rfind(".")
            if p < 0:
                p = len(rpath)
            rpath = rpath[:p] + (".%s" % rsuff) + rpath[p:]
        else:
            rpath = "stdin.%s.po" % rsuff

        rcat.filename = rpath
        rcat.sync(force=True, noobsend=True)
        report(
            _(
                "@info:progress file to which rejected parts of the patch "
                "have been written to",
                "*** Rejects: %(file)s",
                file=rcat.filename))
Example #12
0
def _load_norm_ui_cats (cat, uicpaths, xmlescape):

    # Construct list of catalogs, by catalog name, from which this
    # catalog may draw UI strings.
    # The list should be ordered by decreasing priority,
    # used to resolve references in face of duplicates over catalogs.
    catnames = []

    # - catalogs listed in some header fields
    # NOTE: Mention in module docustring when adding/removing fields.
    afnames = (
        "X-Associated-UI-Catalogs-H",
        "X-Associated-UI-Catalogs",
        "X-Associated-UI-Catalogs-L",
    )
    for afname in afnames:
        for field in cat.header.select_fields(afname):
            # Field value is a list of catalog names.
            lststr = field[1]
            # Remove any summit-merging comments.
            p = lststr.find("~~")
            if p >= 0:
                lststr = lststr[:p]
            catnames.extend(lststr.split())

    # - the catalog itself, if among UI catalogs paths and not explicitly given
    if cat.name in uicpaths and not cat.name in catnames:
        catnames.insert(0, cat.name) # highest priority

    # Make catalog names unique, preserving order.
    uniq_catnames = []
    for catname in catnames:
        if catname not in uniq_catnames:
            uniq_catnames.append(catname)

    # Open and normalize UI catalogs.
    # Cache catalogs for performance.
    uicats = []
    chkeys = set()
    for catname in uniq_catnames:
        catpaths = uicpaths.get(catname)
        if not catpaths:
            warning(_("@info",
                      "UI catalog '%(catname1)s' associated to '%(catname2)s' "
                      "is not among known catalog paths.",
                      catname1=catname, catname2=cat.name))
            continue
        for catpath in catpaths:
            chkey = (xmlescape, catpath)
            chkeys.add(chkey)
            uicat = _norm_cats_cache.get(chkey)
            if uicat is None:
                uicat_raw = Catalog(catpath, monitored=False)
                uicat = _norm_ui_cat(uicat_raw, xmlescape)
                _norm_cats_cache[chkey] = uicat
            uicats.append(uicat)

    # Remove previous catalogs not reused by this call.
    # TODO: Better strategy for removing from cache.
    for chkey in set(_norm_cats_cache.keys()).difference(chkeys):
        #print "Removing normalized UI catalog '%s'..." % list(chkey)
        del _norm_cats_cache[chkey]

    return uicats
Example #13
0
def _norm_ui_cat (cat, xmlescape):

    norm_cat = Catalog("", create=True, monitored=False)
    norm_cat.filename = cat.filename + "~norm"

    # Normalize messages and collect them by normalized keys.
    msgs_by_normkey = {}
    for msg in cat:
        if msg.obsolete:
            continue
        orig_msgkey = (msg.msgctxt, msg.msgid)
        remove_markup_msg(msg, cat) # before accelerator removal
        remove_accel_msg(msg, cat) # after markup removal
        normkey = (msg.msgctxt, msg.msgid)
        if normkey not in msgs_by_normkey:
            msgs_by_normkey[normkey] = []
        msgs_by_normkey[normkey].append((msg, orig_msgkey))

    for msgs in msgs_by_normkey.values():
        # If there are several messages with same normalized key and
        # different translations, add extra disambiguations to context.
        # These disambiguations must not depend on message ordering.
        if len(msgs) > 1:
            # Check equality of translations.
            msgstr0 = u""
            for msg, d1 in msgs:
                if msg.translated:
                    if not msgstr0:
                        msgstr0 = msg.msgstr[0]
                    elif msgstr0 != msg.msgstr[0]:
                        msgstr0 = None
                        break
            if msgstr0 is None: # disambiguation necessary
                tails = set()
                for msg, (octxt, omsgid) in msgs:
                    if msg.msgctxt is None:
                        msg.msgctxt = u""
                    tail = hashlib.md5(omsgid).hexdigest()
                    n = 4 # minimum size of the disambiguation tail
                    while tail[:n] in tails:
                        n += 1
                        if n > len(tail):
                            raise PologyError(
                                _("@info",
                                  "Hash function has returned same result "
                                  "for two different strings."))
                    tails.add(tail[:n])
                    msg.msgctxt += "~" + tail[:n]
            else: # all messages have same translation, use first
                msgs = msgs[:1]

        # Escape text fields.
        if xmlescape:
            for msg, d1 in msgs:
                if msg.msgctxt:
                    msg.msgctxt = _escape_to_xml(msg.msgctxt)
                msg.msgid = _escape_to_xml(msg.msgid)
                if msg.msgid_plural:
                    msg.msgid_plural = _escape_to_xml(msg.msgid_plural)
                for i in range(len(msg.msgstr)):
                    msg.msgstr[i] = _escape_to_xml(msg.msgstr[i])

        # Add normalized messages to normalized catalog.
        for msg, d1 in msgs:
            if msg.msgctxt or msg.msgid:
                norm_cat.add_last(msg)

    return norm_cat
Example #14
0
def translate_parallel(paths, tsbuilder, options):

    pathrepl = options.parcats
    comppath = options.parcomp
    slang = options.slang
    tlang = options.tlang

    ccat = None
    if comppath is not None:
        if not os.path.isfile(comppath):
            error(
                _("@info",
                  "Compendium '%(file)s' does not exist.",
                  file=comppath))
        ccat = Catalog(comppath, monitored=False)

    if pathrepl is not None:
        lst = pathrepl.split(":")
        if len(lst) != 2:
            error(
                _("@info",
                  "Invalid search and replace specification '%(spec)s'.",
                  spec=pathrepl))
        pathsrch, pathrepl = lst

    catpaths = collect_catalogs(paths)
    for catpath in catpaths:

        # Open parallel catalog if it exists.
        pcat = None
        if pathrepl is not None:
            pcatpath = catpath.replace(pathsrch, pathrepl, 1)
            if catpath == pcatpath:
                error(
                    _("@info",
                      "Parallel catalog and target catalog are same files "
                      "for '%(file)s'.",
                      file=catpath))
            if os.path.isfile(pcatpath):
                pcat = Catalog(pcatpath, monitored=False)

        # If there is neither the parallel catalog nor the compendium,
        # skip processing current target catalog.
        if not pcat and not ccat:
            continue

        # Collect messages and texts to translate.
        cat = Catalog(catpath)
        pmsgs, psmsgs, ptexts = [], [], []
        cmsgs, csmsgs, ctexts = [], [], []
        for msg in cat:
            if to_translate(msg, options):
                # Priority: parallel catalog, then compendium.
                for scat, msgs, smsgs, texts in (
                    (pcat, pmsgs, psmsgs, ptexts),
                    (ccat, cmsgs, csmsgs, ctexts),
                ):
                    if scat and msg in scat:
                        smsg = scat[msg]
                        if smsg.translated:
                            msgs.append(msg)
                            smsgs.append(smsg)
                            texts.extend(smsg.msgstr)
                            break

        # Translate collected texts.
        texts_tr = []
        for texts, scat in ((ptexts, pcat), (ctexts, ccat)):
            transerv = get_transerv(slang, tlang, scat, cat, tsbuilder)
            texts_tr.append(transerv.translate(texts) if texts else [])
            if texts_tr[-1] is None:
                texts_tr = None
                break
        if texts_tr is None:
            warning(
                _("@info",
                  "Translation service failure on '%(file)s'.",
                  file=catpath))
            continue
        ptexts_tr, ctexts_tr = texts_tr

        # Put translated texts into messages.
        # For plural messages, assume 1-1 match to parallel language.
        for msgs, smsgs, texts in (
            (pmsgs, psmsgs, ptexts_tr),
            (cmsgs, csmsgs, ctexts_tr),
        ):
            for msg, smsg in zip(msgs, smsgs):
                ctexts = []
                for i in range(len(smsg.msgstr)):
                    text = texts.pop(0)
                    text = reduce_for_encoding(text, cat.encoding())
                    ctexts.append(text)
                for i in range(len(msg.msgstr)):
                    msg.msgstr[i] = i < len(ctexts) and ctexts[i] or ctexts[-1]
                    decorate(msg, options)

        sync_rep(cat, pmsgs + cmsgs)
Example #15
0
def hybdl(path, path0, accnohyb=False):

    cat = Catalog(path)
    cat0 = Catalog(path0, monitored=False)

    nhybridized = 0
    nstopped = 0
    for msg in cat:

        if "no-hybdl" in manc_parse_flag_list(msg, "|"):
            continue

        # Unembed diff if message was diffed for review.
        # Replace ediff with manual review flag.
        diffed = False
        for flag in msg.flag:
            if flag.startswith("ediff"):
                msg.flag.remove(flag)
                diffed = True
        if diffed:
            msg_ediff_to_new(msg, msg)
            msg.flag.add(u"reviewed")

        # Fetch original message.
        msg0 = cat0.get(msg)
        if msg0 is None:
            warning_on_msg(
                _("@info", "Message does not exist in the original catalog."),
                msg, cat)
            nstopped += 1
            continue
        if len(msg.msgstr) != len(msg0.msgstr):
            warning_on_msg(
                _(
                    "@info", "Number of translations not same as in "
                    "the original message."), msg, cat)
            nstopped += 1
            continue
        if msg.msgstr == msg0.msgstr:
            # No changes, nothing new to hybridize.
            continue

        # Hybridize translation.
        textsh = []
        textshinv = []
        for text0, text in zip(msg0.msgstr, msg.msgstr):
            texth = tohi(text0, text, parthyb=True)
            textsh.append(texth)
            if not accnohyb:
                texthinv = tohi(text, text0, parthyb=True)
                textshinv.append(texthinv)
        if accnohyb or textsh == textshinv:
            for i, texth in zip(range(len(msg.msgstr)), textsh):
                msg.msgstr[i] = texth
            nhybridized += 1
        else:
            nstopped += 1
            msgh = MessageUnsafe(msg)
            msgh.msgstr = textsh
            msghinv = MessageUnsafe(msg)
            msghinv.msgstr = textshinv
            msg_ediff(msghinv, msgh, emsg=msgh, colorize=True)
            report_msg_content(msgh, cat, delim=("-" * 20))

    if nstopped == 0:
        if cat.sync():
            report("! %s (%d)" % (path, nhybridized))
    else:
        warning(
            n_("@info", "%(num)d message in '%(file)s' cannot be "
               "cleanly hybridized.",
               "%(num)d messages in '%(file)s' cannot be "
               "cleanly hybridized.",
               num=nstopped,
               file=path))
        nhybridized = 0

    return nhybridized
def generateLongStringTranslationFromPotIntoPo(inputFilePath, outputFilePath):

    templateCatalog = Catalog(inputFilePath)
    longStringCatalog = Catalog(outputFilePath, create=True, truncate=True)

    # Fill catalog with English strings.
    for message in templateCatalog:
        longStringCatalog.add(message)

    # If language codes were specified on the command line, filder by those.
    filters = sys.argv[1:]

    # Load existing translation catalogs.
    existingTranslationCatalogs = []
    l10nFolderPath = os.path.dirname(inputFilePath)

    # .pot is one letter longer than .po, but the dot that separates the locale
    # code from the rest of the filename in .po files makes up for that.
    charactersToSkip = len(os.path.basename(inputFilePath))

    for filename in os.listdir(l10nFolderPath):
        if len(filename
               ) > 3 and filename[-3:] == ".po" and filename[:4] != "long":
            if not filters or filename[:-charactersToSkip] in filters:
                if os.path.basename(inputFilePath)[:-4] == filename.split(
                        '.')[-2]:
                    existingTranslationCatalogs.append(
                        os.path.join(l10nFolderPath, filename))

    # If any existing translation has more characters than the average expansion, use that instead.
    for pofile in existingTranslationCatalogs:
        print(u"Merging", pofile)
        translationCatalog = Catalog(pofile)
        for longStringCatalogMessage in longStringCatalog:
            translationMessage = translationCatalog.select_by_key(
                longStringCatalogMessage.msgctxt,
                longStringCatalogMessage.msgid)
            if not translationMessage:
                continue

            if not longStringCatalogMessage.msgid_plural:
                if len(translationMessage[0].msgstr[0]) > len(
                        longStringCatalogMessage.msgstr[0]):
                    longStringCatalogMessage.msgstr = translationMessage[
                        0].msgstr
                    translationMessage = longStringCatalogMessage
                continue

            longestSingularString = translationMessage[0].msgstr[0]
            longestPluralString = translationMessage[0].msgstr[1] if len(
                translationMessage[0].msgstr) > 1 else longestSingularString

            candidateSingularString = longStringCatalogMessage.msgstr[0]
            candidatePluralString = ""  # There might be between 0 and infinite plural forms.
            for candidateString in longStringCatalogMessage.msgstr[1:]:
                if len(candidateString) > len(candidatePluralString):
                    candidatePluralString = candidateString

            changed = False
            if len(candidateSingularString) > len(longestSingularString):
                longestSingularString = candidateSingularString
                changed = True
            if len(candidatePluralString) > len(longestPluralString):
                longestPluralString = candidatePluralString
                changed = True

            if changed:
                longStringCatalogMessage.msgstr = [
                    longestSingularString, longestPluralString
                ]
                translationMessage = longStringCatalogMessage

    longStringCatalog.set_encoding("utf-8")
    longStringCatalog.sync()
Example #17
0
def main():

    locale.setlocale(locale.LC_ALL, "")

    # Get defaults for command line options from global config.
    cfgsec = pology_config.section("posieve")
    def_do_skip = cfgsec.boolean("skip-on-error", True)
    def_msgfmt_check = cfgsec.boolean("msgfmt-check", False)
    def_skip_obsolete = cfgsec.boolean("skip-obsolete", False)

    # Setup options and parse the command line.
    usage = _("@info command usage",
              "%(cmd)s [OPTIONS] SIEVE [POPATHS...]",
              cmd="%prog")
    desc = _(
        "@info command description",
        "Apply sieves to PO paths, which may be either single PO files or "
        "directories to search recursively for PO files. "
        "Some of the sieves only examine PO files, while others "
        "modify them as well. "
        "The first non-option argument is the sieve name; "
        "a list of several comma-separated sieves can be given too.")
    ver = _("@info command version", u"%(cmd)s (Pology) %(version)s\n"
            u"Copyright © 2007, 2008, 2009, 2010 "
            u"Chusslove Illich (Часлав Илић) &lt;%(email)s&gt;",
            cmd="%prog",
            version=version(),
            email="*****@*****.**")

    opars = ColorOptionParser(usage=usage, description=desc, version=ver)
    opars.add_option(
        "-a",
        "--announce-entry",
        action="store_true",
        dest="announce_entry",
        default=False,
        help=_("@info command line option description",
               "Announce that header or message is just about to be sieved."))
    opars.add_option("-b",
                     "--skip-obsolete",
                     action="store_true",
                     dest="skip_obsolete",
                     default=def_skip_obsolete,
                     help=_("@info command line option description",
                            "Do not sieve obsolete messages."))
    opars.add_option(
        "-c",
        "--msgfmt-check",
        action="store_true",
        dest="msgfmt_check",
        default=def_msgfmt_check,
        help=_("@info command line option description",
               "Check catalogs by %(cmd)s and skip those which do not pass.",
               cmd="msgfmt -c"))
    opars.add_option("-u",
                     "--single-entry",
                     metavar=_("@info command line value placeholder",
                               "ENTRY_NUMBER"),
                     action="store",
                     dest="single_entry",
                     default=0,
                     help=_("@info command line option description",
                            "Only perform the check on this ENTRY_NUMBER."))
    opars.add_option(
        "--force-sync",
        action="store_true",
        dest="force_sync",
        default=False,
        help=_("@info command line option description",
               "Force rewriting of all messages, whether modified or not."))
    opars.add_option("-H",
                     "--help-sieves",
                     action="store_true",
                     dest="help_sieves",
                     default=False,
                     help=_("@info command line option description",
                            "Show help for applied sieves."))
    opars.add_option("--issued-params",
                     action="store_true",
                     dest="issued_params",
                     default=False,
                     help=_(
                         "@info command line option description",
                         "Show all issued sieve parameters "
                         "(from command line and user configuration)."))
    opars.add_option("-l",
                     "--list-sieves",
                     action="store_true",
                     dest="list_sieves",
                     default=False,
                     help=_("@info command line option description",
                            "List available internal sieves."))
    opars.add_option("--list-options",
                     action="store_true",
                     dest="list_options",
                     default=False,
                     help=_("@info command line option description",
                            "List the names of available options."))
    opars.add_option("--list-sieve-names",
                     action="store_true",
                     dest="list_sieve_names",
                     default=False,
                     help=_("@info command line option description",
                            "List the names of available internal sieves."))
    opars.add_option("--list-sieve-params",
                     action="store_true",
                     dest="list_sieve_params",
                     default=False,
                     help=_("@info command line option description",
                            "List the parameters known to issued sieves."))
    opars.add_option("-m",
                     "--output-modified",
                     metavar=_("@info command line value placeholder", "FILE"),
                     action="store",
                     dest="output_modified",
                     default=None,
                     help=_("@info command line option description",
                            "Output names of modified files into FILE."))
    opars.add_option("--no-skip",
                     action="store_false",
                     dest="do_skip",
                     default=def_do_skip,
                     help=_(
                         "@info command line option description",
                         "Do not try to skip catalogs which signal errors."))
    opars.add_option("--no-sync",
                     action="store_false",
                     dest="do_sync",
                     default=True,
                     help=_("@info command line option description",
                            "Do not write any modifications to catalogs."))
    opars.add_option("-q",
                     "--quiet",
                     action="store_true",
                     dest="quiet",
                     default=False,
                     help=_(
                         "@info command line option description",
                         "Do not display any progress info "
                         "(does not influence sieves themselves)."))
    opars.add_option("-s",
                     metavar=_("@info command line value placeholder",
                               "NAME[:VALUE]"),
                     action="append",
                     dest="sieve_params",
                     default=[],
                     help=_("@info command line option description",
                            "Pass a parameter to sieves."))
    opars.add_option(
        "-S",
        metavar=_("@info command line value placeholder", "NAME[:VALUE]"),
        action="append",
        dest="sieve_no_params",
        default=[],
        help=_(
            "@info command line option description",
            "Remove a parameter to sieves "
            "(e.g. if it was issued through user configuration)."))
    opars.add_option("-v",
                     "--verbose",
                     action="store_true",
                     dest="verbose",
                     default=False,
                     help=_("@info command line option description",
                            "Output more detailed progress information."))
    add_cmdopt_filesfrom(opars)
    add_cmdopt_incexc(opars)
    add_cmdopt_colors(opars)

    (op, free_args) = opars.parse_args(str_to_unicode(sys.argv[1:]))

    if op.list_options:
        report(list_options(opars))
        sys.exit(0)

    if len(free_args) < 1 and not (op.list_sieves or op.list_sieve_names):
        error(_("@info", "No sieve to apply given."))

    op.raw_sieves = []
    op.raw_paths = []
    if len(free_args) > 2 and op.single_entry != 0:
        error(
            _("@info",
              "With single entry mode, you can only give one input file."))

    if len(free_args) >= 1:
        op.raw_sieves = free_args[0]
        op.raw_paths = free_args[1:]

    # Could use some speedup.
    try:
        import psyco
        psyco.full()
    except ImportError:
        pass

    set_coloring_globals(ctype=op.coloring_type, outdep=(not op.raw_colors))

    # Dummy-set all internal sieves as requested if sieve listing required.
    sieves_requested = []
    if op.list_sieves or op.list_sieve_names:
        # Global sieves.
        modpaths = glob.glob(os.path.join(datadir(), "sieve", "[a-z]*.py"))
        modpaths.sort()
        for modpath in modpaths:
            sname = os.path.basename(modpath)[:-3]  # minus .py
            sname = sname.replace("_", "-")
            sieves_requested.append(sname)
        # Language-specific sieves.
        modpaths = glob.glob(
            os.path.join(datadir(), "lang", "*", "sieve", "[a-z]*.py"))
        modpaths.sort()
        for modpath in modpaths:
            sname = os.path.basename(modpath)[:-3]  # minus .py
            sname = sname.replace("_", "-")
            lang = os.path.basename(os.path.dirname(os.path.dirname(modpath)))
            sieves_requested.append(lang + ":" + sname)

    # No need to load and setup sieves if only listing sieve names requested.
    if op.list_sieve_names:
        report("\n".join(sieves_requested))
        sys.exit(0)

    # Load sieve modules from supplied names in the command line.
    if not sieves_requested:
        sieves_requested = op.raw_sieves.split(",")
    sieve_modules = []
    for sieve_name in sieves_requested:
        # Resolve sieve file.
        if not sieve_name.endswith(".py"):
            # One of internal sieves.
            if ":" in sieve_name:
                # Language-specific internal sieve.
                lang, name = sieve_name.split(":")
                sieve_path_base = os.path.join("lang", lang, "sieve", name)
            else:
                sieve_path_base = os.path.join("sieve", sieve_name)
            sieve_path_base = sieve_path_base.replace("-", "_") + ".py"
            sieve_path = os.path.join(datadir(), sieve_path_base)
        else:
            # Sieve name is its path.
            sieve_path = sieve_name
        try:
            sieve_file = open(unicode_to_str(sieve_path))
            # ...unicode_to_str because of exec below.
        except IOError:
            error(_("@info", "Cannot load sieve '%(file)s'.", file=sieve_path))
        # Load file into new module.
        sieve_mod_name = "sieve_" + str(len(sieve_modules))
        sieve_mod = imp.new_module(sieve_mod_name)
        exec sieve_file in sieve_mod.__dict__
        sieve_file.close()
        sys.modules[sieve_mod_name] = sieve_mod  # to avoid garbage collection
        sieve_modules.append((sieve_name, sieve_mod))
        if not hasattr(sieve_mod, "Sieve"):
            error(
                _("@info",
                  "Module '%(file)s' does not define %(classname)s class.",
                  file=sieve_path,
                  classname="Sieve"))

    # Setup sieves (description, known parameters...)
    pp = ParamParser()
    snames = []
    for name, mod in sieve_modules:
        scview = pp.add_subcmd(name)
        if hasattr(mod, "setup_sieve"):
            mod.setup_sieve(scview)
        snames.append(name)

    # If info on sieves requested, report and exit.
    if op.list_sieves:
        report(_("@info", "Available internal sieves:"))
        report(pp.listcmd(snames))
        sys.exit(0)
    elif op.list_sieve_params:
        params = set()
        for scview in pp.cmdviews():
            params.update(scview.params(addcol=True))
        report("\n".join(sorted(params)))
        sys.exit(0)
    elif op.help_sieves:
        report(_("@info", "Help for sieves:"))
        report("")
        report(pp.help(snames))
        sys.exit(0)

    # Prepare sieve parameters for parsing.
    sieve_params = list(op.sieve_params)
    # - append paramaters according to configuration
    sieve_params.extend(read_config_params(pp.cmdviews(), sieve_params))
    # - remove paramaters according to command line
    if op.sieve_no_params:
        sieve_params_mod = []
        for parspec in sieve_params:
            if parspec.split(":", 1)[0] not in op.sieve_no_params:
                sieve_params_mod.append(parspec)
        sieve_params = sieve_params_mod

    # If assembly of issued parameters requested, report and exit.
    if op.issued_params:
        escparams = []
        for parspec in sieve_params:
            if ":" in parspec:
                param, value = parspec.split(":", 1)
                escparam = "%s:%s" % (param, escape_sh(value))
            else:
                escparam = parspec
            escparams.append(escparam)
        fmtparams = " ".join(["-s%s" % x for x in sorted(escparams)])
        if fmtparams:
            report(fmtparams)
        sys.exit(0)

    # Parse sieve parameters.
    sparams, nacc_params = pp.parse(sieve_params, snames)
    if nacc_params:
        error(
            _("@info", "Parameters not accepted by any of issued subcommands: "
              "%(paramlist)s.",
              paramlist=format_item_list(nacc_params)))

    # ========================================
    # FIXME: Think of something less ugly.
    # Add as special parameter to each sieve:
    # - root paths from which the catalogs are collected
    # - whether destination independent coloring is in effect
    # - test function for catalog selection
    root_paths = []
    if op.raw_paths:
        root_paths.extend(op.raw_paths)
    if op.files_from:
        for ffpath in op.files_from:
            root_paths.extend(collect_paths_from_file(ffpath))
    if not op.raw_paths and not op.files_from:
        root_paths = ["."]
    is_cat_included = build_path_selector(incnames=op.include_names,
                                          incpaths=op.include_paths,
                                          excnames=op.exclude_names,
                                          excpaths=op.exclude_paths)
    for p in sparams.values():
        p.root_paths = root_paths
        p.raw_colors = op.raw_colors
        p.is_cat_included = is_cat_included
    # ========================================

    # Create sieves.
    sieves = []
    for name, mod in sieve_modules:
        sieves.append(mod.Sieve(sparams[name]))

    # Get the message monitoring indicator from the sieves.
    # Monitor unless all sieves have requested otherwise.
    use_monitored = False
    for sieve in sieves:
        if getattr(sieve, "caller_monitored", True):
            use_monitored = True
            break
    if op.verbose and not use_monitored:
        report(_("@info:progress", "--> Not monitoring messages."))

    # Get the sync indicator from the sieves.
    # Sync unless all sieves have requested otherwise,
    # and unless syncing is disabled globally in command line.
    do_sync = False
    for sieve in sieves:
        if getattr(sieve, "caller_sync", True):
            do_sync = True
            break
    if not op.do_sync:
        do_sync = False
    if op.verbose and not do_sync:
        report(_("@info:progress", "--> Not syncing after sieving."))

    # Open in header-only mode if no sieve has message processor.
    # Categorize sieves by the presence of message/header processors.
    use_headonly = True
    header_sieves = []
    header_sieves_last = []
    message_sieves = []
    for sieve in sieves:
        if hasattr(sieve, "process"):
            use_headonly = False
            message_sieves.append(sieve)
        if hasattr(sieve, "process_header"):
            header_sieves.append(sieve)
        if hasattr(sieve, "process_header_last"):
            header_sieves_last.append(sieve)
    if op.verbose and use_headonly:
        report(_("@info:progress",
                 "--> Opening catalogs in header-only mode."))

    # Collect catalog paths.
    fnames = collect_paths_cmdline(rawpaths=op.raw_paths,
                                   incnames=op.include_names,
                                   incpaths=op.include_paths,
                                   excnames=op.exclude_names,
                                   excpaths=op.exclude_paths,
                                   filesfrom=op.files_from,
                                   elsecwd=True,
                                   respathf=collect_catalogs,
                                   abort=True)

    if op.do_skip:
        errwarn = warning
        errwarn_on_msg = warning_on_msg
    else:
        errwarn = error
        errwarn_on_msg = error_on_msg

    # Prepare inline progress indicator.
    if not op.quiet:
        update_progress = init_file_progress(fnames,
                                             addfmt=t_("@info:progress",
                                                       "Sieving: %(file)s"))

    # Sieve catalogs.
    modified_files = []
    for fname in fnames:
        if op.verbose:
            report(_("@info:progress", "Sieving %(file)s...", file=fname))
        elif not op.quiet:
            update_progress(fname)

        if op.msgfmt_check:
            d1, oerr, ret = collect_system(
                ["msgfmt", "-o", "/dev/null", "-c", fname])
            if ret != 0:
                oerr = oerr.strip()
                errwarn(
                    _("@info:progress", "%(file)s: %(cmd)s check failed:\n"
                      "%(msg)s",
                      file=fname,
                      cmd="msgfmt -c",
                      msg=oerr))
                warning(
                    _("@info:progress",
                      "Skipping catalog due to syntax check failure."))
                continue

        try:
            cat = Catalog(fname,
                          monitored=use_monitored,
                          headonly=use_headonly,
                          single_entry=int(op.single_entry))
        except CatalogSyntaxError, e:
            errwarn(
                _("@info:progress",
                  "%(file)s: Parsing failed: %(msg)s",
                  file=fname,
                  msg=e))
            warning(
                _("@info:progress",
                  "Skipping catalog due to parsing failure."))
            continue

        skip = False
        # First run all header sieves.
        if header_sieves and op.announce_entry:
            report(
                _("@info:progress",
                  "Sieving header of %(file)s...",
                  file=fname))
        for sieve in header_sieves:
            try:
                ret = sieve.process_header(cat.header, cat)
            except SieveCatalogError, e:
                errwarn(
                    _("@info:progress",
                      "%(file)s:header: Sieving failed: %(msg)s",
                      file=fname,
                      msg=e))
                skip = True
                break
            if ret not in (None, 0):
                break
Example #18
0
def generateTemplatesForMessagesFile(messagesFilePath):

    with open(messagesFilePath, 'r') as fileObject:
        settings = json.load(fileObject)

    rootPath = os.path.dirname(messagesFilePath)

    for templateSettings in settings:
        if "skip" in templateSettings and templateSettings["skip"] == "yes":
            continue

        inputRootPath = rootPath
        if "inputRoot" in templateSettings:
            inputRootPath = os.path.join(rootPath,
                                         templateSettings["inputRoot"])

        template = Catalog(os.path.join(rootPath, templateSettings["output"]),
                           create=True,
                           truncate=True)
        h = template.update_header(
            templateSettings["project"],
            "Translation template for %project.",
            "Copyright © " + "2014" + " " +
            templateSettings["copyrightHolder"],
            "This file is distributed under the same license as the %project project.",
            plforms="nplurals=2; plural=(n != 1);")
        h.remove_field("Report-Msgid-Bugs-To")
        h.remove_field("Last-Translator")
        h.remove_field("Language-Team")
        h.remove_field("Language")
        h.author = Monlist()

        for rule in templateSettings["rules"]:
            if "skip" in rule and rule["skip"] == "yes":
                continue

            options = rule.get("options", {})
            extractorClass = getattr(
                __import__("extractors.extractors", {}, {},
                           [rule["extractor"]]), rule["extractor"])
            extractor = extractorClass(inputRootPath, rule["filemasks"],
                                       options)
            formatFlag = None
            if "format" in options:
                formatFlag = options["format"]
            for message, plural, context, location, comments in extractor.run(
            ):
                msg = Message({
                    "msgid":
                    message,
                    "msgid_plural":
                    plural,
                    "msgctxt":
                    context,
                    "auto_comment":
                    comments,
                    "flag": [formatFlag] if formatFlag
                    and string.find(message, "%") != -1 else None,
                    "source": [location]
                })
                if template.get(msg):
                    template.get(msg).source.append(Monpair(location))
                else:
                    template.add(msg)

        template.set_encoding("utf-8")
        template.sync(fitplural=True)
        print(u"Generated \"{}\" with {} messages.".format(
            templateSettings["output"], len(template)))
Example #19
0
def unembed_ediff(path, all=False, old=False):

    try:
        cat = Catalog(path)
    except:
        warning(
            _("@info",
              "Error reading catalog '%(file)s', skipping it.",
              file=path))
        return

    hmsgctxt = cat.header.get_field_value(EDST.hmsgctxt_field)
    if hmsgctxt is not None:
        cat.header.remove_field(EDST.hmsgctxt_field)

    uehmsg = None
    unembedded = {}
    for msg in cat:
        ediff_flag = None
        for flag in _flags_all:
            if flag in msg.flag:
                ediff_flag = flag
                msg.flag.remove(flag)
        if not ediff_flag and not all:
            continue
        if ediff_flag in (_flag_ediff_no_match, _flag_ediff_to_new):
            # Throw away fully rejected embeddings, i.e. reject the patch.
            # For split-difference embeddings, throw away the current-to-new;
            # this effectively rejects the patch, which is safest thing to do.
            cat.remove_on_sync(msg)
        elif hmsgctxt is not None and msg.msgctxt == hmsgctxt:
            if uehmsg:
                warning_on_msg(
                    _("@info", "Unembedding results in duplicate header, "
                      "previous header at %(line)d(#%(entry)d); "
                      "skipping it.",
                      line=uehmsg.refline,
                      entry=uehmsg.refentry), msg, cat)
                return
            msg_ediff_to_x = not old and msg_ediff_to_new or msg_ediff_to_old
            hmsg = msg_ediff_to_x(clear_header_metadata(msg))
            if hmsg.msgstr and hmsg.msgstr[0]:
                cat.header = Header(hmsg)
            cat.remove_on_sync(msg)
            uehmsg = msg
        else:
            msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(msg)
            tmsg = (not old and (msg2, ) or (msg1, ))[0]
            if tmsg is not None:
                if tmsg.key in unembedded:
                    msg_p = unembedded[tmsg.key]
                    warning_on_msg(
                        _("@info", "Unembedding results in "
                          "duplicate message, previous message "
                          "at %(line)d(#%(entry)d); skipping it.",
                          line=msg_p.refline,
                          entry=msg_p.refentry), msg, cat)
                    return
                msg.set(Message(msg2))
                unembedded[tmsg.key] = msg
            else:
                cat.remove_on_sync(msg)

    if cat.sync():
        report(_("@info:progress", "Unembedded: %(file)s", file=cat.filename))
def generateLongStringTranslationFromPotIntoPo(inputFilePath, outputFilePath):

    templateCatalog = Catalog(inputFilePath)
    longStringCatalog = Catalog(outputFilePath, create=True, truncate=True)

    # Fill catalog with English strings.
    for message in templateCatalog:
        longStringCatalog.add(message)

    # If language codes were specified on the command line, filder by those.
    filters = sys.argv[1:]

    # Load existing translation catalogs.
    existingTranslationCatalogs = []
    l10nFolderPath = os.path.dirname(inputFilePath)

    # .pot is one letter longer than .po, but the dot that separates the locale
    # code from the rest of the filename in .po files makes up for that.
    charactersToSkip = len(os.path.basename(inputFilePath))

    for filename in os.listdir(l10nFolderPath):
        if len(filename) > 3 and filename[-3:] == ".po" and filename[:4] != "long":
            if not filters or filename[:-charactersToSkip] in filters:
                if os.path.basename(inputFilePath)[:-4] == filename.split('.')[-2]:
                    existingTranslationCatalogs.append(os.path.join(l10nFolderPath, filename))

    # If any existing translation has more characters than the average expansion, use that instead.
    for pofile in existingTranslationCatalogs:
        print(u"Merging", pofile)
        translationCatalog = Catalog(pofile)
        for longStringCatalogMessage in longStringCatalog:
            translationMessage = translationCatalog.select_by_key(longStringCatalogMessage.msgctxt, longStringCatalogMessage.msgid)
            if not translationMessage:
                continue

            if not longStringCatalogMessage.msgid_plural:
                if len(translationMessage[0].msgstr[0]) > len(longStringCatalogMessage.msgstr[0]):
                    longStringCatalogMessage.msgstr = translationMessage[0].msgstr
                    translationMessage = longStringCatalogMessage
                continue

            longestSingularString = translationMessage[0].msgstr[0]
            longestPluralString = translationMessage[0].msgstr[1] if len(translationMessage[0].msgstr) > 1 else longestSingularString

            candidateSingularString = longStringCatalogMessage.msgstr[0]
            candidatePluralString = "" # There might be between 0 and infinite plural forms.
            for candidateString in longStringCatalogMessage.msgstr[1:]:
                if len(candidateString) > len(candidatePluralString): candidatePluralString = candidateString

            changed = False
            if len(candidateSingularString) > len(longestSingularString):
                longestSingularString = candidateSingularString
                changed = True
            if len(candidatePluralString) > len(longestPluralString):
                longestPluralString   = candidatePluralString
                changed = True

            if changed:
                longStringCatalogMessage.msgstr = [longestSingularString, longestPluralString]
                translationMessage = longStringCatalogMessage

    longStringCatalog.set_encoding("utf-8")
    longStringCatalog.sync()
Example #21
0
def diff_pairs (pspecs, merge,
                colorize=False, wrem=True, wadd=True, shdr=False, noobs=False,
                quiet=False):

    # Create diffs of messages.
    # Note: Headers will be collected and diffed after all messages,
    # to be able to check if any decoration to their message keys is needed.
    wrappings = {}
    ecat = Catalog("", create=True, monitored=False)
    hspecs = []
    ndiffed = 0
    update_progress = None
    if len(pspecs) > 1 and not quiet:
        update_progress = init_file_progress([vp[1] for fp, vp in pspecs],
                            addfmt=t_("@info:progress", "Diffing: %(file)s"))
    for fpaths, vpaths in pspecs:
        upprogf = None
        if update_progress:
            upprogf = lambda: update_progress(vpaths[1])
            upprogf()
        # Quick check if files are binary equal.
        if fpaths[0] and fpaths[1] and filecmp.cmp(*fpaths):
            continue
        cats = []
        for fpath in fpaths:
            try:
                cats.append(Catalog(fpath, create=True, monitored=False))
            except:
                error_wcl(_("@info",
                            "Cannot parse catalog '%(file)s'.",
                            file=fpath), norem=[fpath])
        tpos = len(ecat)
        cndiffed = diff_cats(cats[0], cats[1], ecat,
                             merge, colorize, wrem, wadd, noobs, upprogf)
        hspecs.append(([not x.created() and x.header or None
                        for x in cats], vpaths, tpos, cndiffed))
        ndiffed += cndiffed
        # Collect and count wrapping policy used for to-catalog.
        wrapping = cats[1].wrapping()
        if wrapping not in wrappings:
            wrappings[wrapping] = 0
        wrappings[wrapping] += 1
    if update_progress:
        update_progress()

    # Find appropriate length of context for header messages.
    hmsgctxt = get_msgctxt_for_headers(ecat)
    init_ediff_header(ecat.header, hmsgctxt=hmsgctxt)

    # Create diffs of headers.
    # If some of the messages were diffed,
    # header must be added even if there is no difference.
    incpos = 0
    for hdrs, vpaths, pos, cndiffed in hspecs:
        ehmsg, anydiff = diff_hdrs(hdrs[0], hdrs[1], vpaths[0], vpaths[1],
                                   hmsgctxt, ecat, colorize)
        if anydiff or cndiffed:
            ecat.add(ehmsg, pos + incpos)
            incpos += 1
    # Add diffed headers to total count only if header stripping not in effect.
    if not shdr:
        ndiffed += incpos

    # Set the most used wrapping policy for the ediff catalog.
    if wrappings:
        wrapping = sorted(wrappings.items(), key=lambda x: x[1])[-1][0]
        ecat.set_wrapping(wrapping)
        if wrapping is not None:
            ecat.header.set_field(u"X-Wrapping", u", ".join(wrapping))

    return ecat, ndiffed
Example #22
0
def generateTemplatesForMessagesFile(messagesFilePath):

    with open(messagesFilePath, 'r') as fileObject:
        settings = json.load(fileObject)

    rootPath = os.path.dirname(messagesFilePath)

    for templateSettings in settings:
        if "skip" in templateSettings and templateSettings["skip"] == "yes":
            continue

        inputRootPath = rootPath
        if "inputRoot" in templateSettings:
            inputRootPath = os.path.join(rootPath, templateSettings["inputRoot"])

        template = Catalog(os.path.join(rootPath, templateSettings["output"]), create=True, truncate=True)
        h = template.update_header(templateSettings["project"], "Translation template for %project.", "Copyright © "+"2014"+" "+templateSettings["copyrightHolder"], "This file is distributed under the same license as the %project project.", plforms="nplurals=2; plural=(n != 1);")
        h.remove_field("Report-Msgid-Bugs-To")
        h.remove_field("Last-Translator")
        h.remove_field("Language-Team")
        h.remove_field("Language")
        h.author = Monlist()

        for rule in templateSettings["rules"]:
            if "skip" in rule and rule["skip"] == "yes":
                continue

            options = rule.get("options", {})
            extractorClass = getattr(__import__("extractors.extractors", {}, {}, [rule["extractor"]]), rule["extractor"])
            extractor = extractorClass(inputRootPath, rule["filemasks"], options)
            formatFlag = None
            if "format" in options:
                formatFlag = options["format"]
            for message, plural, context, location, comments in extractor.run():
                msg = Message({"msgid": message, "msgid_plural": plural, "msgctxt": context, "auto_comment": comments, "flag": [formatFlag] if formatFlag and string.find(message, "%") != -1 else None, "source": [location]})
                if template.get(msg):
                    template.get(msg).source.append(Monpair(location))
                else:
                    template.add(msg)

        template.set_encoding("utf-8")
        template.sync(fitplural=True)
        print(u"Generated \"{}\" with {} messages.".format(templateSettings["output"], len(template)))
Example #23
0
File: merge.py Project: KDE/pology
def merge_pofile (catpath, tplpath,
                  outpath=None, update=False, wrapping=None,
                  fuzzymatch=True, cmppaths=None, quiet=False,
                  fuzzex=False, minwnex=0, minasfz=0.0, refuzzy=False,
                  getcat=False, monitored=True,
                  ignpotdate=False, abort=False):
    """
    Merge a PO file with the PO template.

    This function is a frontend to C{msgmerge} command,
    providing some additional features on demand.

    This function is usually used in one of three ways:
      - create a new PO file: the path is given with C{outpath} parameter
      - update the original PO file: C{update} is set to C{True}
            and C{outpath} is not given
      - only get merged catalog object: C{getcat} is set to C{True} and
            neither C{outpath} nor C{update} are issued;
            no PO file gets created or modified (except for temporaries,
            which are cleaned up on return)
      - check whether merging is possible: neither of C{outpath},
            C{update}, or C{getcat} are issued;
            if C{True} is returned, merging succedded.

    The return value differs based on C{getcat}.
    If C{getcat} is C{False}, the return value is C{True} if merging
    succedded (C{msgmerge} exited normally), and C{False} if not.
    If C{getcat} is C{True}, a catalog object on the merged catalog
    is returned if the merging succedded, and C{None} if not.
    However, if C{abort} is set to C{True}, if C{msgmerge} fails
    the program aborts with an error message.

    When catalog object is returned, its sync state is undefined.
    If it needs to be in sync before use, it should be synced manually.

    @param catpath: path to PO file to merge
    @type catpath: string
    @param tplpath: path to PO template
    @type tplpath: string
    @param outpath: path to output PO file
    @type outpath: string
    @param update: whether to update the PO file in place
    @type update: bool
    @param wrapping: the wrapping policy (see the parameter of the same name
        to L{catalog constructor<catalog.Catalog>})
    @type wrapping: sequence of strings
    @param fuzzymatch: whether to perform fuzzy matching
    @type fuzzymatch: bool
    @param cmppaths: paths to compendium files to be used on merging
    @type cmppaths: sequence of strings
    @param quiet: whether C{msgmerge} should operate quietly
    @type quiet: bool
    @param fuzzex: whether to fuzzy exact matches from compendia
    @type fuzzex: bool
    @param minwnex: minimal number of words in the original in exact match
        from compendia to not fuzzy the message (a very large number
        approximates C{fuzzex} set to C{True}).
    @type minwnex: int
    @param refuzzy: whether to "rebase" fuzzy messages, i.e. remove prior
        to merging those fuzzy messages whose translated counterparts
        (determined by previous fields) still exist in the catalog.
        This puts possibly newer translation into such messages,
        or even leads to a better fuzzy match.
    @type refuzzy: bool
    @param getcat: whether to return catalog object on merged file
    @type getcat: L{Catalog<catalog.Catalog>} or C{None}
    @param monitored: if C{getcat} is in effect, whether to open catalog
        in monitoring mode (like the parameter to catalog constructor)
    @type monitored: bool
    @param ignpotdate: whether to ignore changed C{POT-Creation-Date}
        if there were no other changes, resetting it to original value
    @type ignpotdate: bool
    @param abort: whether to abort execution if C{msgmerge} fails
    @type abort: bool

    @returns: whether merging succedded, or catalog object
    @rtype: bool or L{Catalog<catalog.Catalog>} or C{None}
    """

    if wrapping is not None:
        wrap = "basic" in wrapping
        otherwrap = set(wrapping).difference(["basic"])
    else:
        wrap = True
        otherwrap = False

    # Store original catalog if change in template creation date
    # alone should be ignored, for check at the end.
    if ignpotdate:
        orig_cat = Catalog(catpath, monitored=False)

    # Determine which special operations are to be done.
    correct_exact_matches = cmppaths and (fuzzex or minwnex > 0)
    correct_fuzzy_matches = minasfz > 0.0
    rebase_existing_fuzzies = refuzzy and fuzzymatch

    # Pre-process catalog if necessary.
    if correct_exact_matches or rebase_existing_fuzzies:
        may_modify = rebase_existing_fuzzies
        cat = Catalog(catpath, monitored=may_modify)

        # In case compendium is being used,
        # collect keys of all non-translated messages,
        # to later check which exact matches need to be fuzzied.
        # New non-translated messages can come from the template,
        # make sure these too are taken into account.
        if correct_exact_matches:
            nontrkeys = set()
            trkeys = set()
            for msg in cat:
                if not msg.translated:
                    nontrkeys.add(msg.key)
                else:
                    trkeys.add(msg.key)
            tcat = Catalog(tplpath, monitored=False)
            for msg in tcat:
                if msg.key not in trkeys:
                    nontrkeys.add(msg.key)

        # If requested, remove all untranslated messages,
        # and replace every fuzzy message which has previous fields
        # with a dummy previous translated message
        # (unless such message already exists in the catalog).
        # This way, untranslated messages will get fuzzy matched again,
        # and fuzzy messages may get updated translation.
        # However, do not do this for messages where a previous translated
        # message does already exist in the catalog, is fuzzy, and
        # does not have previous fields, since then that one will be
        # fuzzy matched and propagate its lack of previous fields.
        if rebase_existing_fuzzies:
            rebase_dummy_messages = []
            for msg in cat:
                if msg.untranslated:
                    cat.remove_on_sync(msg)
                elif msg.fuzzy and msg.msgid_previous:
                    omsgs = cat.select_by_key(msg.msgctxt_previous,
                                              msg.msgid_previous)
                    if (   not omsgs
                        or not omsgs[0].fuzzy
                        or omsgs[0].msgid_previous is not None
                    ):
                        cat.remove_on_sync(msg)
                    if not omsgs:
                        dmsg = Message()
                        dmsg.msgctxt = msg.msgctxt_previous
                        dmsg.msgid = msg.msgid_previous
                        dmsg.msgid_plural = msg.msgid_plural_previous
                        dmsg.msgstr = msg.msgstr
                        cat.add_last(dmsg)
                        rebase_dummy_messages.append(dmsg)

        if may_modify:
            cat.sync()

    # Prepare temporary file if output path not given and not in update mode.
    if not outpath and not update:
        tmpf = NamedTemporaryFile(prefix="pology-merged-", suffix=".po")
        outpath = tmpf.name

    # Merge.
    opts = []
    if not update:
        opts.append("--output-file %s" % outpath)
    else:
        opts.append("--update")
        opts.append("--backup none")
    if fuzzymatch:
        opts.append("--previous")
    else:
        opts.append("--no-fuzzy-matching")
    if not wrap:
        opts.append("--no-wrap")
    for cmppath in (cmppaths or []):
        if not os.path.isfile(cmppath):
            raise PologyError(
                _("@info",
                  "Compendium does not exist at '%(path)s'.",
                  path=cmppath))
        opts.append("--compendium %s" % cmppath)
    if quiet:
        opts.append("--quiet")
    fmtopts = " ".join(opts)
    cmdline = "msgmerge %s %s %s" % (fmtopts, catpath, tplpath)
    mrgres = os.system(unicode_to_str(cmdline))
    if mrgres != 0:
        if abort:
            raise PologyError(
                _("@info",
                  "Cannot merge PO file '%(file1)s' with template '%(file2)s'.",
                  file1=catpath, file2=tplpath))
        return None if getcat else False

    # If the catalog had only header and no messages,
    # msgmerge will not write out anything.
    # In such case, just copy the initial file to output path.
    if outpath and not os.path.isfile(outpath):
        shutil.copyfile(catpath, outpath)
    # If both the output path has been given and update requested,
    # copy the output file over the initial file.
    if update and outpath and catpath != outpath:
        shutil.copyfile(outpath, catpath)

    # Post-process merged catalog if necessary.
    if (   getcat or otherwrap or correct_exact_matches
        or correct_fuzzy_matches or ignpotdate or rebase_existing_fuzzies
    ):
        # If fine wrapping requested and catalog should not be returned,
        # everything has to be reformatted, so no need to monitor the catalog.
        catpath1 = outpath or catpath
        monitored1 = monitored if getcat else (not otherwrap)
        cat = Catalog(catpath1, monitored=monitored1, wrapping=wrapping)

        # In case compendium is being used,
        # make fuzzy exact matches which do not pass the word limit.
        if correct_exact_matches:
            acc = cat.accelerator()
            for msg in cat:
                if (    msg.key in nontrkeys and msg.translated
                    and (   fuzzex
                         or len(proper_words(msg.msgid, accels=acc)) < minwnex)
                ):
                    msg.fuzzy = True

        # Eliminate fuzzy matches not passing the adjusted similarity limit.
        if correct_fuzzy_matches:
            for msg in cat:
                if msg.fuzzy and msg.msgid_previous is not None:
                    if editprob(msg.msgid_previous, msg.msgid) < minasfz:
                        msg.clear()

        # Revert template creation date change if it was the only change.
        if ignpotdate:
            fname = "POT-Creation-Date"
            orig_potdate = orig_cat.header.get_field_value(fname)
            new_potdate = cat.header.get_field_value(fname)
            cat.header.replace_field_value(fname, orig_potdate)
            if cat != orig_cat:
                cat.header.replace_field_value(fname, new_potdate)

        # Remove dummy messages added for rebasing of fuzzy messages
        # that were obsoleted instead of promoted to fuzzy.
        if rebase_existing_fuzzies:
            for dmsg in rebase_dummy_messages:
                if dmsg in cat and cat[dmsg].obsolete:
                    cat.remove_on_sync(dmsg)

        if not getcat:
            cat.sync(force=otherwrap)

    return cat if getcat else True
Example #24
0
def build_splitting_triplets(emsgs, cat, options):

    # Create catalogs of old and new messages.
    cat1 = Catalog("", create=True, monitored=False)
    cat2 = Catalog("", create=True, monitored=False)
    for emsg in emsgs:
        msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(emsg)
        if msg1:
            cat1.add_last(msg1)
        if msg2:
            cat2.add_last(msg2)
    # Make headers same, to avoid any diffs there.
    cat1.header = cat.header
    cat2.header = cat.header

    # Write created catalogs to disk if
    # msgmerge may be used on files during diffing.
    if options.do_merge:
        tmpfs = []  # to avoid garbage collection until the function returns
        for tcat, tsuff in ((cat1, "1"), (cat2, "2")):
            tmpf = NamedTemporaryFile(prefix="poepatch-split-%s-" % tsuff,
                                      suffix=".po")
            tmpfs.append(tmpf)
            tcat.filename = tmpf.name
            tcat.sync(force=True)

    # Create the old-to-current and current-to-new diffs.
    ecat_1c = Catalog("", create=True, monitored=False)
    diff_cats(cat1, cat, ecat_1c, options.do_merge, wadd=False, wrem=False)
    ecat_c2 = Catalog("", create=True, monitored=False)
    diff_cats(cat, cat2, ecat_c2, options.do_merge, wadd=False, wrem=False)

    # Mine splitting triplets out of diffs.
    sdoublets_1c = {}
    for emsg in ecat_1c:
        m1_t, m_t, m1_ts, m_ts1 = resolve_diff_pair(emsg)
        sdoublets_1c[m_t.key] = [m1_t, m1_ts, m_t, m_ts1]
    sdoublets_c2 = {}
    for emsg in ecat_c2:
        m_t, m2_t, m_ts2, m2_ts = resolve_diff_pair(emsg)
        sdoublets_c2[m_t.key] = [m_t, m_ts2, m2_t, m2_ts]
    common_keys = set(sdoublets_1c).intersection(sdoublets_c2)
    striplets = []
    for key in common_keys:
        m1_t, m1_ts, m_t, m_ts1 = sdoublets_1c[key]
        m_t, m_ts2, m2_t, m2_ts = sdoublets_c2[key]
        striplets.append((m1_t, m1_ts, m2_t, m2_ts, m_t, m_ts1, m_ts2))

    return striplets
Example #25
0
File: stats.py Project: KDE/pology
    def finalize(self):

        # If template correspondence requested, handle POTs without POs.
        if self.template_subdirs:
            # Collect all catalogs in template subdirs.
            tpaths = collect_catalogs(self.template_subdirs)
            tpaths = filter(self.p.is_cat_included, tpaths)
            # Filter to have only POTs remain.
            tpaths = [x for x in tpaths if x.endswith(".pot")]
            # Filter to leave out matched templates.
            tpaths = [x for x in tpaths if x not in self.matched_templates]
            # Add stats on all unmatched templates.
            for tpath in tpaths:
                cat = Catalog(tpath, monitored=False)
                self.process_header(cat.header, cat)
                for msg in cat:
                    self.process(msg, cat)
            # Map template to translation subdirs.
            for tpath in tpaths:
                tsubdir = os.path.dirname(tpath)
                subdir = tsubdir.replace(self.tspec_repl, self.tspec_srch, 1)
                self.mapped_template_subdirs[tsubdir] = subdir

        # If completeness limit in effect, eliminate catalogs not passing it.
        if self.p.mincomp is not None:
            ncounts = {}
            ninccats = {}
            for filename, count in self.counts.iteritems():
                cr = float(count["trn"][0]) / (count["tot"][0] or 1)
                if cr >= self.p.mincomp:
                    ncounts[filename] = count
                    inccat = self.incomplete_catalogs.get(filename)
                    if inccat is not None:
                        ninccats[filename] = inccat
            self.counts = ncounts
            self.incomplete_catalogs = ninccats

        # Assemble sets of total counts by requested divisions.
        count_overall = self._count_zero()
        counts_bydir = {}
        filenames_bydir = {}
        for filename, count in self.counts.iteritems():

            count_overall = self._count_sum(count_overall, count)

            if self.p.bydir:
                cdir = os.path.dirname(filename)
                if cdir in self.mapped_template_subdirs:
                    # Pretend templates-only are within language subdir.
                    cdir = self.mapped_template_subdirs[cdir]
                if cdir not in counts_bydir:
                    counts_bydir[cdir] = self._count_zero()
                    filenames_bydir[cdir] = []
                counts_bydir[cdir] = self._count_sum(counts_bydir[cdir], count)
                filenames_bydir[cdir].append(filename)

        # Arrange sets into ordered list with titles.
        counts = []
        if self.p.bydir:
            cdirs = counts_bydir.keys()
            cdirs.sort()
            for cdir in cdirs:
                if self.p.byfile:
                    self._sort_equiv_filenames(filenames_bydir[cdir])
                    for filename in filenames_bydir[cdir]:
                        counts.append((filename, self.counts[filename], False))
                counts.append(("%s/" % cdir, counts_bydir[cdir], False))
            counts.append((_("@item:intable sum of all other entries",
                             "(overall)"), count_overall, True))

        elif self.p.byfile:
            filenames = self.counts.keys()
            self._sort_equiv_filenames(filenames)
            for filename in filenames:
                counts.append((filename, self.counts[filename], False))
            counts.append((_("@item:intable sum of all other entries",
                             "(overall)"), count_overall, True))

        else:
            counts.append((None, count_overall, False))

        # Indicate conspicuously up front modifiers to counting.
        modstrs = []
        if self.p.branch:
            fmtbranches = format_item_list(self.p.branch)
            modstrs.append(
                _("@item:intext",
                  "branches (%(branchlist)s)",
                  branchlist=fmtbranches))
        if self.p.maxwords is not None and self.p.minwords is None:
            modstrs.append(
                n_("@item:intext",
                   "at most %(num)d word",
                   "at most %(num)d words",
                   num=self.p.maxwords))
        if self.p.minwords is not None and self.p.maxwords is None:
            modstrs.append(
                n_("@item:intext",
                   "at least %(num)d word",
                   "at least %(num)d words",
                   num=self.p.minwords))
        if self.p.minwords is not None and self.p.maxwords is not None:
            modstrs.append(
                n_("@item:intext",
                   "from %(num1)d to %(num)d word",
                   "from %(num1)d to %(num)d words",
                   num1=self.p.minwords,
                   num=self.p.maxwords))
        if self.p.lspan:
            modstrs.append(
                _("@item:intext", "line span %(span)s", span=self.p.lspan))
        if self.p.espan:
            modstrs.append(
                _("@item:intext", "entry span %(span)s", span=self.p.espan))
        if self.p.ondiff:
            modstrs.append(_("@item:intext", "scaled fuzzy counts"))

        # Should titles be output in-line or on separate lines.
        self.inline = False
        maxtitlecw = 0
        if (not self.p.wbar or not self.p.msgbar
                or not self.p.msgfmt) and (not self.p.table):
            for title, count, summed in counts:
                if title is not None:
                    self.inline = True
                    titlecw = len(title)
                    if maxtitlecw < titlecw:
                        maxtitlecw = titlecw

        # Output statistics in requested forms.
        for title, count, summed in counts:
            # Output the title if defined.
            if title is not None:
                if self.inline:
                    ntitle = (("%%-%ds" % maxtitlecw) % title)
                else:
                    ntitle = title
                # Must color after padding, to avoid it seeing the colors.
                ntitle = _("@title", "<bold>%(title)s</bold>", title=ntitle)
                if self.inline:
                    report(ntitle + " ", newline=False)
                else:
                    report(ntitle)

            if self.p.table:
                self._tabular_stats(counts, title, count)
            if self.p.msgbar:
                self._msg_bar_stats(counts, title, count, summed)
            if self.p.wbar:
                self._w_bar_stats(counts, title, count, summed)
            if self.p.msgfmt:
                self._msg_simple_stats(title, count, summed)

        # Output the table of catalogs which are not fully translated,
        # if requested.
        if self.p.incomplete and self.incomplete_catalogs:
            filenames = self.incomplete_catalogs.keys()
            self._sort_equiv_filenames(filenames)
            data = []
            # Column of catalog filenames.
            data.append(filenames)
            data.append([self.counts[x]["fuz"][0] for x in filenames])
            data.append([self.counts[x]["unt"][0] for x in filenames])
            data.append([x + y for x, y in zip(data[1], data[2])])
            data.append([self.counts[x]["fuz"][1] for x in filenames])
            data.append([self.counts[x]["unt"][1] for x in filenames])
            data.append([x + y for x, y in zip(data[4], data[5])])
            # Columns of the two added.
            # Column names and formats.
            coln = [
                _("@title:column", "catalog"),
                _("@title:column fuzzy messages", "msg/f"),
                _("@title:column untranslated messages", "msg/u"),
                _("@title:column fuzzy and untranslated messages", "msg/f+u"),
                _("@title:column words in fuzzy messages", "w/f"),
                _("@title:column words in untranslated messages", "w/u"),
                _("@title:column words in fuzzy and untranslated messages",
                  "w/f+u")
            ]
            maxfl = max([len(x) for x in filenames])
            dfmt = ["%%-%ds" % maxfl, "%d", "%d", "%d", "%d", "%d", "%d"]
            # Output.
            report("-")
            report(
                tabulate(data,
                         coln=coln,
                         dfmt=dfmt,
                         space="   ",
                         none=u"-",
                         colorize=True))

        # Write file names of catalogs which are not fully translated
        # into a file, if requested.
        if self.p.incompfile:
            filenames = sorted(self.incomplete_catalogs.keys())
            cmdlenc = locale.getpreferredencoding()
            ofl = codecs.open(self.p.incompfile, "w", cmdlenc)
            ofl.writelines([x + "\n" for x in filenames])
            ofl.close()

        if modstrs:
            report(
                _("@item:intable",
                  "modifiers: %(modlist)s",
                  modlist=format_item_list(modstrs)))