def icat_w(cat, icat_pack): if icat_pack[0] is None: #print "===> inverting: %s" % cat.filename icat = Catalog("", create=True, monitored=False) for msg in cat: upprogf() imsg = _msg_invert_cp(msg) if imsg not in icat: icat.add_last(imsg) icat_pack[0] = icat return icat_pack[0]
def build_splitting_triplets(emsgs, cat, options): # Create catalogs of old and new messages. cat1 = Catalog("", create=True, monitored=False) cat2 = Catalog("", create=True, monitored=False) for emsg in emsgs: msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(emsg) if msg1: cat1.add_last(msg1) if msg2: cat2.add_last(msg2) # Make headers same, to avoid any diffs there. cat1.header = cat.header cat2.header = cat.header # Write created catalogs to disk if # msgmerge may be used on files during diffing. if options.do_merge: tmpfs = [] # to avoid garbage collection until the function returns for tcat, tsuff in ((cat1, "1"), (cat2, "2")): tmpf = NamedTemporaryFile(prefix="poepatch-split-%s-" % tsuff, suffix=".po") tmpfs.append(tmpf) tcat.filename = tmpf.name tcat.sync(force=True) # Create the old-to-current and current-to-new diffs. ecat_1c = Catalog("", create=True, monitored=False) diff_cats(cat1, cat, ecat_1c, options.do_merge, wadd=False, wrem=False) ecat_c2 = Catalog("", create=True, monitored=False) diff_cats(cat, cat2, ecat_c2, options.do_merge, wadd=False, wrem=False) # Mine splitting triplets out of diffs. sdoublets_1c = {} for emsg in ecat_1c: m1_t, m_t, m1_ts, m_ts1 = resolve_diff_pair(emsg) sdoublets_1c[m_t.key] = [m1_t, m1_ts, m_t, m_ts1] sdoublets_c2 = {} for emsg in ecat_c2: m_t, m2_t, m_ts2, m2_ts = resolve_diff_pair(emsg) sdoublets_c2[m_t.key] = [m_t, m_ts2, m2_t, m2_ts] common_keys = set(sdoublets_1c).intersection(sdoublets_c2) striplets = [] for key in common_keys: m1_t, m1_ts, m_t, m_ts1 = sdoublets_1c[key] m_t, m_ts2, m2_t, m2_ts = sdoublets_c2[key] striplets.append((m1_t, m1_ts, m2_t, m2_ts, m_t, m_ts1, m_ts2)) return striplets
def merge_pofile (catpath, tplpath, outpath=None, update=False, wrapping=None, fuzzymatch=True, cmppaths=None, quiet=False, fuzzex=False, minwnex=0, minasfz=0.0, refuzzy=False, getcat=False, monitored=True, ignpotdate=False, abort=False): """ Merge a PO file with the PO template. This function is a frontend to C{msgmerge} command, providing some additional features on demand. This function is usually used in one of three ways: - create a new PO file: the path is given with C{outpath} parameter - update the original PO file: C{update} is set to C{True} and C{outpath} is not given - only get merged catalog object: C{getcat} is set to C{True} and neither C{outpath} nor C{update} are issued; no PO file gets created or modified (except for temporaries, which are cleaned up on return) - check whether merging is possible: neither of C{outpath}, C{update}, or C{getcat} are issued; if C{True} is returned, merging succedded. The return value differs based on C{getcat}. If C{getcat} is C{False}, the return value is C{True} if merging succedded (C{msgmerge} exited normally), and C{False} if not. If C{getcat} is C{True}, a catalog object on the merged catalog is returned if the merging succedded, and C{None} if not. However, if C{abort} is set to C{True}, if C{msgmerge} fails the program aborts with an error message. When catalog object is returned, its sync state is undefined. If it needs to be in sync before use, it should be synced manually. @param catpath: path to PO file to merge @type catpath: string @param tplpath: path to PO template @type tplpath: string @param outpath: path to output PO file @type outpath: string @param update: whether to update the PO file in place @type update: bool @param wrapping: the wrapping policy (see the parameter of the same name to L{catalog constructor<catalog.Catalog>}) @type wrapping: sequence of strings @param fuzzymatch: whether to perform fuzzy matching @type fuzzymatch: bool @param cmppaths: paths to compendium files to be used on merging @type cmppaths: sequence of strings @param quiet: whether C{msgmerge} should operate quietly @type quiet: bool @param fuzzex: whether to fuzzy exact matches from compendia @type fuzzex: bool @param minwnex: minimal number of words in the original in exact match from compendia to not fuzzy the message (a very large number approximates C{fuzzex} set to C{True}). @type minwnex: int @param refuzzy: whether to "rebase" fuzzy messages, i.e. remove prior to merging those fuzzy messages whose translated counterparts (determined by previous fields) still exist in the catalog. This puts possibly newer translation into such messages, or even leads to a better fuzzy match. @type refuzzy: bool @param getcat: whether to return catalog object on merged file @type getcat: L{Catalog<catalog.Catalog>} or C{None} @param monitored: if C{getcat} is in effect, whether to open catalog in monitoring mode (like the parameter to catalog constructor) @type monitored: bool @param ignpotdate: whether to ignore changed C{POT-Creation-Date} if there were no other changes, resetting it to original value @type ignpotdate: bool @param abort: whether to abort execution if C{msgmerge} fails @type abort: bool @returns: whether merging succedded, or catalog object @rtype: bool or L{Catalog<catalog.Catalog>} or C{None} """ if wrapping is not None: wrap = "basic" in wrapping otherwrap = set(wrapping).difference(["basic"]) else: wrap = True otherwrap = False # Store original catalog if change in template creation date # alone should be ignored, for check at the end. if ignpotdate: orig_cat = Catalog(catpath, monitored=False) # Determine which special operations are to be done. correct_exact_matches = cmppaths and (fuzzex or minwnex > 0) correct_fuzzy_matches = minasfz > 0.0 rebase_existing_fuzzies = refuzzy and fuzzymatch # Pre-process catalog if necessary. if correct_exact_matches or rebase_existing_fuzzies: may_modify = rebase_existing_fuzzies cat = Catalog(catpath, monitored=may_modify) # In case compendium is being used, # collect keys of all non-translated messages, # to later check which exact matches need to be fuzzied. # New non-translated messages can come from the template, # make sure these too are taken into account. if correct_exact_matches: nontrkeys = set() trkeys = set() for msg in cat: if not msg.translated: nontrkeys.add(msg.key) else: trkeys.add(msg.key) tcat = Catalog(tplpath, monitored=False) for msg in tcat: if msg.key not in trkeys: nontrkeys.add(msg.key) # If requested, remove all untranslated messages, # and replace every fuzzy message which has previous fields # with a dummy previous translated message # (unless such message already exists in the catalog). # This way, untranslated messages will get fuzzy matched again, # and fuzzy messages may get updated translation. # However, do not do this for messages where a previous translated # message does already exist in the catalog, is fuzzy, and # does not have previous fields, since then that one will be # fuzzy matched and propagate its lack of previous fields. if rebase_existing_fuzzies: rebase_dummy_messages = [] for msg in cat: if msg.untranslated: cat.remove_on_sync(msg) elif msg.fuzzy and msg.msgid_previous: omsgs = cat.select_by_key(msg.msgctxt_previous, msg.msgid_previous) if ( not omsgs or not omsgs[0].fuzzy or omsgs[0].msgid_previous is not None ): cat.remove_on_sync(msg) if not omsgs: dmsg = Message() dmsg.msgctxt = msg.msgctxt_previous dmsg.msgid = msg.msgid_previous dmsg.msgid_plural = msg.msgid_plural_previous dmsg.msgstr = msg.msgstr cat.add_last(dmsg) rebase_dummy_messages.append(dmsg) if may_modify: cat.sync() # Prepare temporary file if output path not given and not in update mode. if not outpath and not update: tmpf = NamedTemporaryFile(prefix="pology-merged-", suffix=".po") outpath = tmpf.name # Merge. opts = [] if not update: opts.append("--output-file %s" % outpath) else: opts.append("--update") opts.append("--backup none") if fuzzymatch: opts.append("--previous") else: opts.append("--no-fuzzy-matching") if not wrap: opts.append("--no-wrap") for cmppath in (cmppaths or []): if not os.path.isfile(cmppath): raise PologyError( _("@info", "Compendium does not exist at '%(path)s'.", path=cmppath)) opts.append("--compendium %s" % cmppath) if quiet: opts.append("--quiet") fmtopts = " ".join(opts) cmdline = "msgmerge %s %s %s" % (fmtopts, catpath, tplpath) mrgres = os.system(unicode_to_str(cmdline)) if mrgres != 0: if abort: raise PologyError( _("@info", "Cannot merge PO file '%(file1)s' with template '%(file2)s'.", file1=catpath, file2=tplpath)) return None if getcat else False # If the catalog had only header and no messages, # msgmerge will not write out anything. # In such case, just copy the initial file to output path. if outpath and not os.path.isfile(outpath): shutil.copyfile(catpath, outpath) # If both the output path has been given and update requested, # copy the output file over the initial file. if update and outpath and catpath != outpath: shutil.copyfile(outpath, catpath) # Post-process merged catalog if necessary. if ( getcat or otherwrap or correct_exact_matches or correct_fuzzy_matches or ignpotdate or rebase_existing_fuzzies ): # If fine wrapping requested and catalog should not be returned, # everything has to be reformatted, so no need to monitor the catalog. catpath1 = outpath or catpath monitored1 = monitored if getcat else (not otherwrap) cat = Catalog(catpath1, monitored=monitored1, wrapping=wrapping) # In case compendium is being used, # make fuzzy exact matches which do not pass the word limit. if correct_exact_matches: acc = cat.accelerator() for msg in cat: if ( msg.key in nontrkeys and msg.translated and ( fuzzex or len(proper_words(msg.msgid, accels=acc)) < minwnex) ): msg.fuzzy = True # Eliminate fuzzy matches not passing the adjusted similarity limit. if correct_fuzzy_matches: for msg in cat: if msg.fuzzy and msg.msgid_previous is not None: if editprob(msg.msgid_previous, msg.msgid) < minasfz: msg.clear() # Revert template creation date change if it was the only change. if ignpotdate: fname = "POT-Creation-Date" orig_potdate = orig_cat.header.get_field_value(fname) new_potdate = cat.header.get_field_value(fname) cat.header.replace_field_value(fname, orig_potdate) if cat != orig_cat: cat.header.replace_field_value(fname, new_potdate) # Remove dummy messages added for rebasing of fuzzy messages # that were obsoleted instead of promoted to fuzzy. if rebase_existing_fuzzies: for dmsg in rebase_dummy_messages: if dmsg in cat and cat[dmsg].obsolete: cat.remove_on_sync(dmsg) if not getcat: cat.sync(force=otherwrap) return cat if getcat else True
def apply_ediff(op): # Read the ediff PO. dummy_stream_path = "<stdin>" if op.input: if not os.path.isfile(op.input): error( _("@info", "Path '%(path)s' is not a file or does not exist.", path=op.input)) edfpath = op.input readfh = None else: edfpath = dummy_stream_path readfh = sys.stdin try: ecat = Catalog(edfpath, monitored=False, readfh=readfh) except: error( _("@info ediff is shorthand for \"embedded difference\"", "Error reading ediff '%(file)s'.", file=edfpath)) # Split ediff by diffed catalog into original and new file paths, # header message, and ordinary messages. hmsgctxt = ecat.header.get_field_value(EDST.hmsgctxt_field) if hmsgctxt is None: error( _("@info", "Header field '%(field)s' is missing in the ediff.", field=EDST.hmsgctxt_field)) edsplits = [] cehmsg = None smsgid = u"\x00" ecat.add_last(MessageUnsafe(dict(msgctxt=hmsgctxt, msgid=smsgid))) # sentry for emsg in ecat: if emsg.msgctxt == hmsgctxt: if cehmsg: # Record previous section. edsplits.append((fpaths, cehmsg, cemsgs)) if emsg.msgid == smsgid: # end sentry, avoid parsing below break # Mine original and new file paths out of header. fpaths = [] for fpath in emsg.msgid.split("\n")[:2]: # Strip leading "+ "/"- " fpath = fpath[2:] # Convert to planform path separators. fpath = re.sub(r"/+", os.path.sep, fpath) # Remove revision indicator. p = fpath.find(EDST.filerev_sep) if p >= 0: fpath = fpath[:p] # Strip path and append directory as requested. if op.strip: preflen = int(op.strip) lst = fpath.split(os.path.sep, preflen) if preflen + 1 == len(lst): fpath = lst[preflen] else: fpath = os.path.basename(fpath) else: fpath = os.path.basename(fpath) if op.directory and fpath: fpath = os.path.join(op.directory, fpath) # All done. fpaths.append(fpath) cehmsg = emsg cemsgs = [] else: cemsgs.append(emsg) # Prepare catalog for rejects and merges. rcat = Catalog("", create=True, monitored=False, wrapping=ecat.wrapping()) init_ediff_header(rcat.header, hmsgctxt=hmsgctxt, extitle="rejects") # Apply diff to catalogs. for fpaths, ehmsg, emsgs in edsplits: # Open catalog for patching. fpath1, fpath2 = fpaths if fpath1: # Diff from an existing catalog, open it. if not os.path.isfile(fpath1): warning( _("@info", "Path '%(path)s' is not a file or does not exist, " "skipping it.", path=fpath1)) continue try: cat = Catalog(fpath1) except: warning( _("@info", "Error reading catalog '%(file)s', skipping it.", file=fpath1)) continue elif fpath2: # New catalog added in diff, create it (or open if it exists). try: mkdirpath(os.path.dirname(fpath2)) cat = Catalog(fpath2, create=True) if cat.created(): cat.set_wrapping(ecat.wrapping()) except: if os.path.isfile(fpath2): warning( _("@info", "Error reading catalog '%(file)s', skipping it.", file=fpath1)) else: warning( _("@info", "Cannot create catalog '%(file)s', skipping it.", file=fpath2)) continue else: error(_("@info", "Both catalogs in ediff indicated not to exist.")) # Do not try to patch catalog with embedded differences # (i.e. previously patched using -e). if cat.header.get_field_value(EDST.hmsgctxt_field) is not None: warning( _("@info", "Catalog '%(file)s' already contains " "embedded differences, skipping it.", file=cat.filename)) continue # Do not try to patch catalog if the patch contains # unresolved split differences. if reduce(lambda r, x: r or _flag_ediff_to_new in x.flag, emsgs, False): warning( _("@info", "Patch for catalog '%(file)s' contains unresolved " "split differences, skipping it.", file=cat.filename)) continue # Patch the catalog. rejected_ehmsg = patch_header(cat, ehmsg, ecat, op) rejected_emsgs_flags = patch_messages(cat, emsgs, ecat, op) any_rejected = rejected_ehmsg or rejected_emsgs_flags if fpath2 or any_rejected: created = cat.created() if cat.sync(): if not created: if any_rejected and op.embed: report( _("@info:progress E is for \"with embedding\"", "Partially patched (E): %(file)s", file=cat.filename)) elif any_rejected: report( _("@info:progress", "Partially patched: %(file)s", file=cat.filename)) elif op.embed: report( _("@info:progress E is for \"with embedding\"", "Patched (E): %(file)s", file=cat.filename)) else: report( _("@info:progress", "Patched: %(file)s", file=cat.filename)) else: if op.embed: report( _("@info:progress E is for \"with embedding\"", "Created (E): %(file)s", file=cat.filename)) else: report( _("@info:progress", "Created: %(file)s", file=cat.filename)) else: pass #report("unchanged: %s" % cat.filename) else: os.unlink(fpath1) report(_("@info:progress", "Removed: %(file)s", file=fpath1)) # If there were any rejects and reembedding is not in effect, # record the necessary to present them. if any_rejected and not op.embed: if not rejected_ehmsg: # Clean header diff. ehmsg.manual_comment = ehmsg.manual_comment[:1] ehmsg.msgstr[0] = u"" rcat.add_last(ehmsg) for emsg, flag in rejected_emsgs_flags: # Reembed to avoid any conflicts. msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(emsg) emsg = msg_ediff(msg1_s, msg2_s, emsg=msg2_s, ecat=rcat, enoctxt=hmsgctxt) if flag: emsg.flag.add(flag) rcat.add_last(emsg) # If there were any rejects, write them out. if len(rcat) > 0: # Construct paths for embedded diffs of rejects. rsuff = "rej" if ecat.filename != dummy_stream_path: rpath = ecat.filename p = rpath.rfind(".") if p < 0: p = len(rpath) rpath = rpath[:p] + (".%s" % rsuff) + rpath[p:] else: rpath = "stdin.%s.po" % rsuff rcat.filename = rpath rcat.sync(force=True, noobsend=True) report( _( "@info:progress file to which rejected parts of the patch " "have been written to", "*** Rejects: %(file)s", file=rcat.filename))
def _norm_ui_cat (cat, xmlescape): norm_cat = Catalog("", create=True, monitored=False) norm_cat.filename = cat.filename + "~norm" # Normalize messages and collect them by normalized keys. msgs_by_normkey = {} for msg in cat: if msg.obsolete: continue orig_msgkey = (msg.msgctxt, msg.msgid) remove_markup_msg(msg, cat) # before accelerator removal remove_accel_msg(msg, cat) # after markup removal normkey = (msg.msgctxt, msg.msgid) if normkey not in msgs_by_normkey: msgs_by_normkey[normkey] = [] msgs_by_normkey[normkey].append((msg, orig_msgkey)) for msgs in msgs_by_normkey.values(): # If there are several messages with same normalized key and # different translations, add extra disambiguations to context. # These disambiguations must not depend on message ordering. if len(msgs) > 1: # Check equality of translations. msgstr0 = u"" for msg, d1 in msgs: if msg.translated: if not msgstr0: msgstr0 = msg.msgstr[0] elif msgstr0 != msg.msgstr[0]: msgstr0 = None break if msgstr0 is None: # disambiguation necessary tails = set() for msg, (octxt, omsgid) in msgs: if msg.msgctxt is None: msg.msgctxt = u"" tail = hashlib.md5(omsgid).hexdigest() n = 4 # minimum size of the disambiguation tail while tail[:n] in tails: n += 1 if n > len(tail): raise PologyError( _("@info", "Hash function has returned same result " "for two different strings.")) tails.add(tail[:n]) msg.msgctxt += "~" + tail[:n] else: # all messages have same translation, use first msgs = msgs[:1] # Escape text fields. if xmlescape: for msg, d1 in msgs: if msg.msgctxt: msg.msgctxt = _escape_to_xml(msg.msgctxt) msg.msgid = _escape_to_xml(msg.msgid) if msg.msgid_plural: msg.msgid_plural = _escape_to_xml(msg.msgid_plural) for i in range(len(msg.msgstr)): msg.msgstr[i] = _escape_to_xml(msg.msgstr[i]) # Add normalized messages to normalized catalog. for msg, d1 in msgs: if msg.msgctxt or msg.msgid: norm_cat.add_last(msg) return norm_cat