def msg_patchable(msg, msg1, msg2): # Check for cases where current message does not match old or new, # but there is a transformation that can also be cleanly merged. msg_m = msg if 0: pass # Old and new are translated, but current is fuzzy and has previous fields. # Transform current to its previous state, from which it may have became # fuzzy by merging with templates. elif (msg and msg.fuzzy and msg.key_previous is not None and msg1 and not msg1.fuzzy and msg2 and not msg2.fuzzy): msg_m = MessageUnsafe(msg) msg_copy_fields(msg, msg_m, MPC.prevcurr_fields) msg_clear_prev_fields(msg_m) msg_m.fuzzy = False # Old is None, new is translated, and current is untranslated. # Add translation of new to current, since it may have been added as # untranslated after merging with templates. elif msg and msg.untranslated and not msg1 and msg2 and msg2.translated: msg_m = MessageUnsafe(msg) msg_copy_fields(msg2, msg_m, ["msgstr"]) if msg1 and msg2: return msg and msg_m.inv in (msg1.inv, msg2.inv) elif msg2: return not msg or msg_m.inv == msg2.inv elif msg1: return not msg or msg_m.inv == msg1.inv else: return not msg
def process (self, msg, cat): if not msg.translated: return highlight = [] # Convert embedded to proper context. if _ctxtsep in msg.msgid: p = msg.msgid.find(_ctxtsep) msg = MessageUnsafe(msg) # should not modify original message msg.msgctxt = msg.msgid[:p] msg.msgid = msg.msgid[p + len(_ctxtsep):] for check in self.current_checks: self.nproblems += check(msg, cat, False, highlight) if highlight: if self.showmsg: report_msg_content(msg, cat, highlight=highlight, delim=("-" * 20)) else: report_on_msg_hl(highlight, msg, cat) if self.lokalize: report_msg_to_lokalize(msg, cat, highlight)
def clear_header_metadata(ehmsg): ehmsg = MessageUnsafe(ehmsg) ehmsg.manual_comment.pop(0) ehmsg.msgctxt = None ehmsg.msgid = u"" return ehmsg
def msg_copy_fields(m1, m2, fields): if m1 is None: m1 = MessageUnsafe() for field in fields: if not isinstance(field, tuple): field = (field, field) setattr(m2, field[1], m1.get(field[0]))
def _msg_invert_cp(msg): if msg is None: return None lmsg = MessageUnsafe() if msg.key_previous is not None: # Need to invert only key fields, but whadda hell. for fcurr, fprev in MPC.currprev_fields: setattr(lmsg, fcurr, msg.get(fprev)) setattr(lmsg, fprev, msg.get(fcurr)) else: return lmsg.set_key(msg) return lmsg
def patch_header(cat, ehmsg, ecat, options): if not ehmsg.msgstr[0]: # no header diff, only metadata return None ehmsg_clean = clear_header_metadata(ehmsg) # Create reduced headers. hmsg1 = msg_ediff_to_old(ehmsg_clean) hmsg2 = msg_ediff_to_new(ehmsg_clean) hmsg = not cat.created() and cat.header.to_msg() or None hdrs = [] for m in (hmsg, hmsg1, hmsg2): h = m is not None and reduce_header_fields(Header(m)) or None hdrs.append(h) rhdr, rhdr1, rhdr2 = hdrs # Decide if the header can be cleanly patched. clean = False if not rhdr: clean = rhdr1 or rhdr2 else: clean = (rhdr1 and rhdr == rhdr1) or (rhdr2 and rhdr == rhdr2) if clean: if not options.embed: if hmsg2: cat.header = Header(hmsg2) else: # Catalog will be removed if no messages are rejected, # and otherwise the header should stay as-is. pass else: if cat.created(): cat.header = Header(hmsg2) ehmsg = MessageUnsafe(ehmsg) ehmsg.flag.add(_flag_ediff) hmsgctxt = get_msgctxt_for_headers(cat) ehmsg.msgctxt = hmsgctxt cat.header.set_field(EDST.hmsgctxt_field, hmsgctxt) cat.add(Message(ehmsg), 0) return None else: return ehmsg
def translate_direct(paths, tsbuilder, options): transervs = {} catpaths = collect_catalogs(paths) for catpath in catpaths: # Collect messages and texts to translate. cat = Catalog(catpath) if options.accel is not None: # force explicitly given accelerator cat.set_accelerator(options.accel) texts = [] msgs = [] for msg in cat: if to_translate(msg, options): msgf = MessageUnsafe(msg) remove_accel_msg(msgf, cat) texts.append(msgf.msgid) if msg.msgid_plural is not None: texts.append(msgf.msgid_plural) msgs.append(msg) # Translate collected texts. slang = options.slang or "en" transerv = get_transerv(slang, options.tlang, cat, cat, tsbuilder) texts_tr = transerv.translate(texts) if texts else [] if texts_tr is None: warning( _("@info", "Translation service failure on '%(file)s'.", file=catpath)) continue for i, text in enumerate(texts_tr): text = reduce_for_encoding(text, cat.encoding()) texts_tr[i] = text # Put translated texts into messages. singlepls = cat.plural_indices_single() for msg in msgs: msgid_tr = texts_tr.pop(0) if msg.msgid_plural is not None: msgid_plural_tr = texts_tr.pop(0) if msgid_tr: if msg.msgid_plural is not None: for i in range(len(msg.msgstr)): if i in singlepls: msg.msgstr[i] = msgid_tr else: msg.msgstr[i] = msgid_plural_tr else: msg.msgstr[0] = msgid_tr decorate(msg, options) sync_rep(cat, msgs)
def make_filtered_msg(msg, cat, accels=None, filters=[]): """ TODO: Write documentation. """ # Must not modify contents of real message. msgf = MessageUnsafe(msg) # - remove accelerators if accels is not None: old_accels = cat.accelerator() cat.set_accelerator(accels) remove_accel_msg(msgf, cat) if accels is not None: cat.set_accelerator(old_accels) # - apply msgstr filters for filtr in filters: for i in range(len(msgf.msgstr)): msgf.msgstr[i] = filtr(msgf.msgstr[i]) return msgf
def diff_hdrs(hdr1, hdr2, vpath1, vpath2, hmsgctxt, ecat, colorize): hmsg1, hmsg2 = [ x and MessageUnsafe(x.to_msg()) or None for x in (hdr1, hdr2) ] ehmsg = hmsg2 and MessageUnsafe(hmsg2) or None ehmsg, dr = msg_ediff(hmsg1, hmsg2, emsg=ehmsg, ecat=ecat, colorize=colorize, diffr=True) if dr == 0.0: # Revert to empty message if no difference between headers. ehmsg = MessageUnsafe() # Add visual paths as old/new segments into msgid. vpaths = [vpath1, vpath2] # Always use slashes as path separator, for portability of ediffs. vpaths = [x.replace(os.path.sep, "/") for x in vpaths] ehmsg.msgid = u"- %s\n+ %s" % tuple(vpaths) # Add trailing newline if msgstr has it, again to appease msgfmt. if ehmsg.msgstr[0].endswith("\n"): ehmsg.msgid += "\n" # Add context identifying the diffed message as header. ehmsg.msgctxt = hmsgctxt # Add conspicuous separator at the top of the header. ehmsg.manual_comment.insert(0, u"=" * 76) return ehmsg, dr > 0.0
def resolve_diff_pair(emsg): # Recover old and new message according to diff. # Resolve into copies of ediff message, to preserve non-inv parts. emsg1 = MessageUnsafe(emsg) msg1_s = msg_ediff_to_old(emsg1, rmsg=emsg1) emsg2 = MessageUnsafe(emsg) msg2_s = msg_ediff_to_new(emsg2, rmsg=emsg2) # Resolve any special pairings. msg1, msg2 = msg1_s, msg2_s if not msg1_s or not msg2_s: # No special cases if either message non-existant. pass # Cases f-nf-*. elif msg1_s.fuzzy and not msg2_s.fuzzy: # Case f-nf-ecc. if (msg2_s.key_previous is None and not msg_eq_fields(msg1_s, msg2_s, MPC.curr_fields)): msg1 = MessageUnsafe(msg1_s) msg_copy_fields(msg1_s, msg1, MPC.currprev_fields) msg_copy_fields(msg2_s, msg1, MPC.curr_fields) # Case f-nf-necc. elif msg2_s.key_previous is not None: msg1 = MessageUnsafe(msg1_s) msg2 = MessageUnsafe(msg2_s) msg_copy_fields(msg2_s, msg1, MPC.prevcurr_fields) msg_clear_prev_fields(msg2) # Cases nf-f-*. elif not msg1_s.fuzzy and msg2_s.fuzzy: # Case nf-f-ecp. if (msg1_s.key_previous is None and not msg_eq_fields(msg1_s, msg2_s, MPC.curr_fields)): msg2 = MessageUnsafe(msg2_s) msg_copy_fields(msg1_s, msg2, MPC.currprev_fields) # Case nf-f-necp. elif msg1_s.key_previous is not None: msg1 = MessageUnsafe(msg1_s) msg2 = MessageUnsafe(msg2_s) msg_copy_fields(msg1_s, msg2, MPC.prev_fields) msg_clear_prev_fields(msg1) return msg1, msg2, msg1_s, msg2_s
def _add_msg_diff(msg1, msg2, ecat, colorize, fnsyn=None): # Skip diffing if old and new messages are "same". if msg1 and msg2 and msg1.inv == msg2.inv: return 0 # Create messages for special pairings. msg1_s, msg2_s = _create_special_diff_pair(msg1, msg2) # Create the diff. tmsg = msg2 or msg1 emsg = msg2_s or msg1_s if emsg is tmsg: emsg = MessageUnsafe(tmsg) emsg = msg_ediff(msg1_s, msg2_s, emsg=emsg, ecat=ecat, colorize=colorize) # Add to the diff catalog. if fnsyn is None: ecat.add(emsg, len(ecat)) else: ecat.add(emsg, srefsyn=fnsyn) return 1
def _create_special_diff_pair(msg1, msg2): msg1_s, msg2_s = msg1, msg2 if not msg1 or not msg2: # No special cases if either message non-existant. pass # Cases f-nf-*. elif msg1.fuzzy and msg1.key_previous is not None and not msg2.fuzzy: # Case f-nf-ecc. if msg_eq_fields(msg1, msg2, MPC.curr_fields): msg1_s = MessageUnsafe(msg1) msg_copy_fields(msg1, msg1_s, MPC.prevcurr_fields) msg_clear_prev_fields(msg1_s) # Case f-nf-necc. else: msg1_s = MessageUnsafe(msg1) msg2_s = MessageUnsafe(msg2) msg_copy_fields(msg1, msg1_s, MPC.prevcurr_fields) msg_copy_fields(msg1, msg2_s, MPC.currprev_fields) # Cases nf-f-*. elif not msg1.fuzzy and msg2.fuzzy and msg2.key_previous is not None: # Case nf-f-ecp. if msg_eq_fields(msg1, msg2, MPC.currprev_fields): msg2_s = MessageUnsafe(msg2) msg_clear_prev_fields(msg2_s) # Case nf-f-necp. else: msg1_s = MessageUnsafe(msg1) msg2_s = MessageUnsafe(msg2) msg_copy_fields(msg2, msg1_s, MPC.prev_fields) msg_copy_fields(msg2, msg2_s, MPC.currprev_fields) return msg1_s, msg2_s
def process(self, msg, cat): # Summit: if branches were given, skip the message if it does not # belong to any of the given branches. if self.p.branch: msg_branches = parse_summit_branches(msg) if not set.intersection(self.p.branch, msg_branches): return # If line/entry spans given, skip message if not in range. if self.lspan[0] is not None and msg.refline < self.lspan[0]: return if self.lspan[1] is not None and msg.refline >= self.lspan[1]: return if self.espan[0] is not None and msg.refentry < self.espan[0]: return if self.espan[1] is not None and msg.refentry >= self.espan[1]: return # Decide if a metamessage: ismeta = False # - msgid in form "@@<tag>: ..." from xml2po if msg.msgid.startswith("@@"): ps = msg.msgid.find(":") ismeta = (ps >= 0 and msg.msgid[2:ps].isalnum()) # - translator credits from xml2po and xml2pot if (msg.msgid in self.xml2po_meta_msgid or msg.msgid in self.xml2pot_meta_msgid): ismeta = True # - translator credits in KDE GUI if msg.msgctxt in self.kde_meta_msgctxt: ismeta = True # Prepare filtered message for counting. if self.pfilters: msg = MessageUnsafe(msg) for pfilter in self.pfilters: for i in range(len(msg.msgstr)): msg.msgstr[i] = pfilter(msg.msgstr[i]) # Count the words and characters in original and translation. # Remove shortcut markers prior to counting; don't include words # which do not start with a letter; remove scripted part. # For plural messages compute averages of msgid and msgstr groups, # to normalize comparative counts on varying number of plural forms. nwords = {"orig": 0, "tran": 0} nchars = {"orig": 0, "tran": 0} msgids = [msg.msgid] if msg.msgid_plural is not None: msgids.append(msg.msgid_plural) for src, texts in (("orig", msgids), ("tran", msg.msgstr)): if ismeta: # consider metamessages as zero counts continue lnwords = [] # this group's word count, for averaging lnchars = [] # this group's character count, for averaging for text in texts: pf = text.find("|/|") if pf >= 0: text = text[0:pf] words = proper_words(text, True, cat.accelerator(), msg.format) # If there are no proper words but there are some characters, # set to one empty word in order for a fuzzy or # an untranslated message not to be considered translated # when only word counts are observed. if not words and text: words = [""] lnwords.append(len(words)) lnchars.append(len("".join(words))) nwords[src] += int(round(float(sum(lnwords)) / len(texts))) nchars[src] += int(round(float(sum(lnchars)) / len(texts))) #nchars[src] += (nwords[src] - 1) # nominal space per each two words # If the number of words has been limited, skip the message if it # does not fall in the range. if self.p.maxwords is not None: if not (nwords["orig"] <= self.p.maxwords or nwords["tran"] <= self.p.maxwords): return if self.p.minwords is not None: if not (nwords["orig"] >= self.p.minwords or nwords["tran"] >= self.p.minwords): return # Split word and character counts in fuzzy original if requested. nswords = {} nschars = {} if self.p.ondiff and msg.fuzzy and msg.msgid_previous is not None: diff, dr = tdiff(msg.msgid_previous, msg.msgid, diffr=True) # Reduce difference ratio to a smaller range by some threshold. # Texts more different than the threshold need full review. drth = 0.4 #dr2 = dr if dr < drth else 1.0 dr2 = min(dr / drth, 1.0) # Split counts between primary fuzzy count, and secondary # translated, so that total remains the same. nswords.update({"trn": {}, "fuz": {}, "unt": {}}) nschars.update({"trn": {}, "fuz": {}, "unt": {}}) for nitems, nitems2, src in ( (nwords, nswords, "orig"), (nwords, nswords, "tran"), (nchars, nschars, "orig"), (nchars, nschars, "tran"), ): num = nitems[src] # Difference ratio of 0 can happen if the new and old texts # are the same, normally when only the context has changed. # Fuzzy counts should not be totally eliminated then, # as it should be seen that message needs updating. if dr2 > 0.0: rnum = int(round(dr2 * num + 0.5)) # round up else: rnum = 1 rnum = min(rnum, num) # in case of rounding overflow nitems2["trn"][src] = num - rnum nitems2["fuz"][src] = 0 nitems2["unt"][src] = rnum # Detect categories and add the counts. categories = set() if not msg.obsolete: # do not count obsolete into totals self.count["tot"][0] += 1 categories.add("tot") if nswords: categories.update(nswords.keys()) if msg.obsolete: # do not split obsolete into fuzzy/translated self.count["obs"][0] += 1 categories.add("obs") nswords = {} nschars = {} elif msg.translated: self.count["trn"][0] += 1 categories.add("trn") elif msg.fuzzy: self.count["fuz"][0] += 1 categories.add("fuz") if cat.filename not in self.incomplete_catalogs: self.incomplete_catalogs[cat.filename] = True elif msg.untranslated: self.count["unt"][0] += 1 categories.add("unt") if cat.filename not in self.incomplete_catalogs: self.incomplete_catalogs[cat.filename] = True for cat in categories: nwords1 = nswords.get(cat, nwords) nchars1 = nschars.get(cat, nchars) self.count[cat][1] += nwords1["orig"] self.count[cat][2] += nwords1["tran"] self.count[cat][3] += nchars1["orig"] self.count[cat][4] += nchars1["tran"]
def cats_update_effort(cat1, cat2, upprogf=None): upprogf = upprogf or (lambda: None) dpairs = _pair_msgs(cat1, cat2, merge=True, wrem=False, wadd=True, noobs=False, upprogf=upprogf) nntw_total = 0 for msg1, msg2 in dpairs: upprogf() if not msg2.active: continue if msg1 is None: msg1 = MessageUnsafe() # The update effort of the given old-new message pair is equal # to "nominal number of newly translated words" (NNTW), # which is defined as follows: # - nominal length of a word in msgid is set to 6 characters (WL). # - number of characters in new msgid is divided by WL # to give nominal number of words in new msgid (NWO) # - number of equal characters in old and new msgid is divided by WL # to give nominal number of equal words in msgid (NEWO) # - number of characters in new msgstr is divided by number of # characters in new msgid to give translation expansion factor (EF) # - number of equal characters in old and new msgstr is divided # by WL*EF to give nominal number of equal words in msgstr (NEWT) # - character-based similarity ratio of old and new msgid # (from 0.0 for no similarity to 1.0 for equality) is computed (SRO) # - character-based similarity ratio of old and new msgstr # is computed (SRT) # - similarity ratio threshold is set to 0.5 (SRB) # - reduction due to similiarity factor is computed as # RSF = (min(SRO, SRT) - SRB) / (1 - SRB) # - nominal number of newly translated words is computed as # NNTW = min(NWO - max(NEWO, NEWT) * RSF, NWO) # # Only those pairs where the new message is active are counted in. # # On plural messages, for the moment only msgid and msgstr[0] # are considered, and the above procedured applied to them. # This underestimates the effort of updating a new plural message # when old message was ordinary. wl = 6.0 nwo = len(msg2.msgid) / wl diffo, dro = tdiff(msg1.msgid, msg2.msgid, diffr=True) newo = len([c for t, c in diffo if t == " "]) / wl ef = float(len(msg2.msgstr[0])) / len(msg2.msgid) difft, drt = tdiff(msg1.msgstr[0], msg2.msgstr[0], diffr=True) newt = len([c for t, c in difft if t == " "]) / (wl * ef) sro = 1.0 - dro srt = 1.0 - drt srb = 0.5 rsf = (min(sro, srt) - srb) / (1.0 - srb) nntw = max(min(nwo - max(newo, newt) * rsf, nwo), 0.0) nntw_total += nntw return nntw_total
def hybdl(path, path0, accnohyb=False): cat = Catalog(path) cat0 = Catalog(path0, monitored=False) nhybridized = 0 nstopped = 0 for msg in cat: if "no-hybdl" in manc_parse_flag_list(msg, "|"): continue # Unembed diff if message was diffed for review. # Replace ediff with manual review flag. diffed = False for flag in msg.flag: if flag.startswith("ediff"): msg.flag.remove(flag) diffed = True if diffed: msg_ediff_to_new(msg, msg) msg.flag.add(u"reviewed") # Fetch original message. msg0 = cat0.get(msg) if msg0 is None: warning_on_msg( _("@info", "Message does not exist in the original catalog."), msg, cat) nstopped += 1 continue if len(msg.msgstr) != len(msg0.msgstr): warning_on_msg( _( "@info", "Number of translations not same as in " "the original message."), msg, cat) nstopped += 1 continue if msg.msgstr == msg0.msgstr: # No changes, nothing new to hybridize. continue # Hybridize translation. textsh = [] textshinv = [] for text0, text in zip(msg0.msgstr, msg.msgstr): texth = tohi(text0, text, parthyb=True) textsh.append(texth) if not accnohyb: texthinv = tohi(text, text0, parthyb=True) textshinv.append(texthinv) if accnohyb or textsh == textshinv: for i, texth in zip(range(len(msg.msgstr)), textsh): msg.msgstr[i] = texth nhybridized += 1 else: nstopped += 1 msgh = MessageUnsafe(msg) msgh.msgstr = textsh msghinv = MessageUnsafe(msg) msghinv.msgstr = textshinv msg_ediff(msghinv, msgh, emsg=msgh, colorize=True) report_msg_content(msgh, cat, delim=("-" * 20)) if nstopped == 0: if cat.sync(): report("! %s (%d)" % (path, nhybridized)) else: warning( n_("@info", "%(num)d message in '%(file)s' cannot be " "cleanly hybridized.", "%(num)d messages in '%(file)s' cannot be " "cleanly hybridized.", num=nstopped, file=path)) nhybridized = 0 return nhybridized
def process (self, msg, cat): # Apply rules only on translated messages. if not msg.translated: return # Apply rules only to messages from selected branches. if self.branches: msg_branches = parse_summit_branches(msg) if not set.intersection(self.branches, msg_branches): return filename = basename(cat.filename) # New file handling if self.xmlFile and self.filename != filename: newFile = True self.cached = False # Reset flag self.cachePath = join(_CACHEDIR, abspath(cat.filename).replace("/", _MARSHALL)) if self.cacheFile: self.cacheFile.close() if self.filename != "": # close previous self.xmlFile.write("</po>\n") self.filename = filename else: newFile = False # Current file loaded from cache on previous message. Close and return if self.cached: # No need to analyze message, return immediately if self.cacheFile: self.cacheFile = None # Indicate cache has been used and flushed into xmlFile return # Does cache exist for this file ? if self.xmlFile and newFile and exists(self.cachePath): poDate = None for headerName, headerValue in cat.header.field: if headerName == "PO-Revision-Date": poDate = headerValue break if poDate: #Truncate daylight information poDate = poDate.rstrip("GMT") poDate = poDate[0:poDate.find("+")] #Convert in sec since epoch time format poDate = mktime(strptime(poDate, '%Y-%m-%d %H:%M')) if os.stat(self.cachePath)[8] > poDate: if self.ruleinfo: report(_("@info:progress", "Using cache.")) self.xmlFile.writelines(open(self.cachePath, "r", "utf-8").readlines()) self.cached = True # No cache available, create it for next time if self.xmlFile and newFile and not self.cached: if self.ruleinfo: report(_("@info", "No cache available, processing file.")) self.cacheFile = open(self.cachePath, "w", "utf-8") # Handle start/end of files for XML output (not needed for text output) if self.xmlFile and newFile: # open new po if self.cached: # We can return now, cache is used, no need to process catalog return else: poTag = '<po name="%s">\n' % filename self.xmlFile.write(poTag) # Write to result self.cacheFile.write(poTag) # Write to cache # Collect explicitly ignored rules by ID for this message. locally_ignored = manc_parse_list(msg, "skip-rule:", ",") # Collect explicitly applied rules by ID for this message. locally_applied = manc_parse_list(msg, "apply-rule:", ",") # Collect ignored/applied rules by switching comment. swprefix = "switch-rule:" swsep = ">" for cmnt in msg.manual_comment: if cmnt.strip().startswith(swprefix): p1 = cmnt.find(swprefix) + len(swprefix) p2 = cmnt.find(swsep, p1) if p2 < 0: raise SieveMessageError( _("@info", "Separator character '%(sep)s' missing in " "'%(prefix)s' comment.", sep=swsep, prefix=swprefix)) els1 = [x.strip() for x in cmnt[p1:p2].split(",")] els2 = [x.strip() for x in cmnt[p2 + len(swsep):].split(",")] locally_ignored.extend(x for x in els1 if x) locally_applied.extend(x for x in els2 if x) # NOTE: It would be nice to warn if an explicitly applied rule # is not defined, but this is not generally possible because # different rule files may be loaded for different runs. # Prepare filtered messages for checking. envSet = set(self.envs) msgByFilter = {} for mfilter in self.ruleFilters: if mfilter is not None: msgf = MessageUnsafe(msg) mfilter(msgf, cat, envSet) else: msgf = msg msgByFilter[mfilter] = msgf # Now the sieve itself. Check message with every rules failedRules = [] for rule in self.rules: if rule.disabled: continue if rule.environ and rule.environ not in envSet: continue if rule.ident in locally_ignored: continue if rule.manual and not rule.ident in locally_applied: continue msgf = msgByFilter[rule.mfilter] try: spans = rule.process(msgf, cat, envs=envSet, nofilter=True) except TimedOutException: warning(_("@info:progress", "Rule '%(rule)s' timed out, skipping it.", rule=rule.rawPattern)) continue if spans: self.nmatch += 1 if self.xmlFile: # FIXME: rule_xml_error is actually broken, # as it considers matching to always be on msgstr # Multiple span are now supported as well as msgstr index # Now, write to XML file if defined rspans = [x[:2] for x in spans[0][2]] pluid = spans[0][1] xmlError = rule_xml_error(msg, cat, rule, rspans, pluid) self.xmlFile.writelines(xmlError) if not self.cached: # Write result in cache self.cacheFile.writelines(xmlError) if not self.showfmsg: msgf = None failedRules.append((rule, spans, msgf)) if failedRules: if not self.byrule: multi_rule_error(msg, cat, failedRules, self.showmsg, predelim=self._first_error) self._first_error = False else: for rule, spans, msgf in failedRules: if rule.ident not in self.postFailedMessages: self.postFailedMessages[rule.ident] = [] self.postFailedMessages[rule.ident].append( (msg, cat, ((rule, spans, msgf)))) if self.mark: msg.flag.add(_flag_mark) if self.lokalize: repls = [_("@label", "Failed rules:")] for rule, hl, msgf in failedRules: repls.append(_("@item", "rule %(rule)s ==> %(msg)s", rule=rule.displayName, msg=rule.hint)) for part, item, spans, fval in hl: repls.extend([u"↳ %s" % x[2] for x in spans if len(x) > 2]) report_msg_to_lokalize(msg, cat, cjoin(repls, "\n"))
def apply_ediff(op): # Read the ediff PO. dummy_stream_path = "<stdin>" if op.input: if not os.path.isfile(op.input): error( _("@info", "Path '%(path)s' is not a file or does not exist.", path=op.input)) edfpath = op.input readfh = None else: edfpath = dummy_stream_path readfh = sys.stdin try: ecat = Catalog(edfpath, monitored=False, readfh=readfh) except: error( _("@info ediff is shorthand for \"embedded difference\"", "Error reading ediff '%(file)s'.", file=edfpath)) # Split ediff by diffed catalog into original and new file paths, # header message, and ordinary messages. hmsgctxt = ecat.header.get_field_value(EDST.hmsgctxt_field) if hmsgctxt is None: error( _("@info", "Header field '%(field)s' is missing in the ediff.", field=EDST.hmsgctxt_field)) edsplits = [] cehmsg = None smsgid = u"\x00" ecat.add_last(MessageUnsafe(dict(msgctxt=hmsgctxt, msgid=smsgid))) # sentry for emsg in ecat: if emsg.msgctxt == hmsgctxt: if cehmsg: # Record previous section. edsplits.append((fpaths, cehmsg, cemsgs)) if emsg.msgid == smsgid: # end sentry, avoid parsing below break # Mine original and new file paths out of header. fpaths = [] for fpath in emsg.msgid.split("\n")[:2]: # Strip leading "+ "/"- " fpath = fpath[2:] # Convert to planform path separators. fpath = re.sub(r"/+", os.path.sep, fpath) # Remove revision indicator. p = fpath.find(EDST.filerev_sep) if p >= 0: fpath = fpath[:p] # Strip path and append directory as requested. if op.strip: preflen = int(op.strip) lst = fpath.split(os.path.sep, preflen) if preflen + 1 == len(lst): fpath = lst[preflen] else: fpath = os.path.basename(fpath) else: fpath = os.path.basename(fpath) if op.directory and fpath: fpath = os.path.join(op.directory, fpath) # All done. fpaths.append(fpath) cehmsg = emsg cemsgs = [] else: cemsgs.append(emsg) # Prepare catalog for rejects and merges. rcat = Catalog("", create=True, monitored=False, wrapping=ecat.wrapping()) init_ediff_header(rcat.header, hmsgctxt=hmsgctxt, extitle="rejects") # Apply diff to catalogs. for fpaths, ehmsg, emsgs in edsplits: # Open catalog for patching. fpath1, fpath2 = fpaths if fpath1: # Diff from an existing catalog, open it. if not os.path.isfile(fpath1): warning( _("@info", "Path '%(path)s' is not a file or does not exist, " "skipping it.", path=fpath1)) continue try: cat = Catalog(fpath1) except: warning( _("@info", "Error reading catalog '%(file)s', skipping it.", file=fpath1)) continue elif fpath2: # New catalog added in diff, create it (or open if it exists). try: mkdirpath(os.path.dirname(fpath2)) cat = Catalog(fpath2, create=True) if cat.created(): cat.set_wrapping(ecat.wrapping()) except: if os.path.isfile(fpath2): warning( _("@info", "Error reading catalog '%(file)s', skipping it.", file=fpath1)) else: warning( _("@info", "Cannot create catalog '%(file)s', skipping it.", file=fpath2)) continue else: error(_("@info", "Both catalogs in ediff indicated not to exist.")) # Do not try to patch catalog with embedded differences # (i.e. previously patched using -e). if cat.header.get_field_value(EDST.hmsgctxt_field) is not None: warning( _("@info", "Catalog '%(file)s' already contains " "embedded differences, skipping it.", file=cat.filename)) continue # Do not try to patch catalog if the patch contains # unresolved split differences. if reduce(lambda r, x: r or _flag_ediff_to_new in x.flag, emsgs, False): warning( _("@info", "Patch for catalog '%(file)s' contains unresolved " "split differences, skipping it.", file=cat.filename)) continue # Patch the catalog. rejected_ehmsg = patch_header(cat, ehmsg, ecat, op) rejected_emsgs_flags = patch_messages(cat, emsgs, ecat, op) any_rejected = rejected_ehmsg or rejected_emsgs_flags if fpath2 or any_rejected: created = cat.created() if cat.sync(): if not created: if any_rejected and op.embed: report( _("@info:progress E is for \"with embedding\"", "Partially patched (E): %(file)s", file=cat.filename)) elif any_rejected: report( _("@info:progress", "Partially patched: %(file)s", file=cat.filename)) elif op.embed: report( _("@info:progress E is for \"with embedding\"", "Patched (E): %(file)s", file=cat.filename)) else: report( _("@info:progress", "Patched: %(file)s", file=cat.filename)) else: if op.embed: report( _("@info:progress E is for \"with embedding\"", "Created (E): %(file)s", file=cat.filename)) else: report( _("@info:progress", "Created: %(file)s", file=cat.filename)) else: pass #report("unchanged: %s" % cat.filename) else: os.unlink(fpath1) report(_("@info:progress", "Removed: %(file)s", file=fpath1)) # If there were any rejects and reembedding is not in effect, # record the necessary to present them. if any_rejected and not op.embed: if not rejected_ehmsg: # Clean header diff. ehmsg.manual_comment = ehmsg.manual_comment[:1] ehmsg.msgstr[0] = u"" rcat.add_last(ehmsg) for emsg, flag in rejected_emsgs_flags: # Reembed to avoid any conflicts. msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(emsg) emsg = msg_ediff(msg1_s, msg2_s, emsg=msg2_s, ecat=rcat, enoctxt=hmsgctxt) if flag: emsg.flag.add(flag) rcat.add_last(emsg) # If there were any rejects, write them out. if len(rcat) > 0: # Construct paths for embedded diffs of rejects. rsuff = "rej" if ecat.filename != dummy_stream_path: rpath = ecat.filename p = rpath.rfind(".") if p < 0: p = len(rpath) rpath = rpath[:p] + (".%s" % rsuff) + rpath[p:] else: rpath = "stdin.%s.po" % rsuff rcat.filename = rpath rcat.sync(force=True, noobsend=True) report( _( "@info:progress file to which rejected parts of the patch " "have been written to", "*** Rejects: %(file)s", file=rcat.filename))
def msg_apply_diff(cat, emsg, ecat, pmsgkeys, striplets): msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(emsg) # Try to select existing message from the original messages. # Order is important, should try first new, then old # (e.g. if an old fuzzy was resolved to new after diff was made). msg = None if msg2 and msg2 in cat: msg = cat[msg2] elif msg1 and msg1 in cat: msg = cat[msg1] patch_specs = [] # Try to apply the patch. if msg_patchable(msg, msg1, msg2): # Patch can be directly applied. if msg1 and msg2: if msg.key not in pmsgkeys: typ = _pt_merge pos = cat.find(msg) pmsgkeys.add(msg.key) else: typ = _pt_insert pos, weight = cat.insertion_inquiry(msg2) elif msg2: # patch adds a message if msg: typ = _pt_merge pos = cat.find(msg) pmsgkeys.add(msg.key) else: typ = _pt_insert pos, weight = cat.insertion_inquiry(msg2) elif msg1: # patch removes a message if msg: typ = _pt_remove pos = cat.find(msg) pmsgkeys.add(msg.key) else: typ = _pt_remove pos = None # no position to remove from else: # Cannot happen. error_on_msg( _( "@info", "Neither the old nor the new message " "in the diff is indicated to exist."), emsg, ecat) patch_specs.append( (emsg, _flag_ediff, typ, pos, msg1, msg2, msg1_s, msg2_s)) else: # Patch cannot be applied directly, # try to split into old-to-current and current-to-new diffs. split_found = False if callable(striplets): striplets = striplets() # delayed creation of splitting triplets for i in range(len(striplets)): m1_t, m1_ts, m2_t, m2_ts, m_t, m_ts1, m_ts2 = striplets[i] if msg1.inv == m1_t.inv and msg2.inv == m2_t.inv: striplets.pop(i) # remove to not slow further searches split_found = True break if split_found: # Construct new corresponding diffs. em_1c = msg_ediff(m1_ts, m_ts1, emsg=MessageUnsafe(m_t)) em_c2 = msg_ediff(m_ts2, m2_ts, emsg=MessageUnsafe(m2_t)) # Current-to-new can be merged or inserted, # and old-to-current is then inserted just before it. if m_t.key not in pmsgkeys: typ = _pt_merge pos = cat.find(m_t) pmsgkeys.add(m_t.key) else: typ = _pt_insert pos, weight = cat.insertion_inquiry(m2_t) # Order of adding patch specs here important for rejects file. patch_specs.append((em_1c, _flag_ediff_to_cur, _pt_insert, pos, m1_t, m_t, m1_ts, m_ts1)) patch_specs.append( (em_c2, _flag_ediff_to_new, typ, pos, m_t, m2_t, m_ts2, m2_ts)) # The patch is totally rejected. # Will be inserted if reembedding requested, so compute insertion. if not patch_specs: typ = _pt_insert if msg2 is not None: pos, weight = cat.insertion_inquiry(msg2) else: pos = len(cat) patch_specs.append( (emsg, _flag_ediff_no_match, typ, pos, msg1, msg2, msg1_s, msg2_s)) return patch_specs
def patch_messages(cat, emsgs, ecat, options): # It may happen that a single message from original catalog # is paired with more than one from the diff # (e.g. single old translated message going into two new fuzzy). # Therefore paired messages must be tracked, to know if patched # message can be merged into the existing, or it must be inserted. pmsgkeys = set() # Triplets for splitting directly unapplicable patches into two. # Delay building of triplets until needed for the first time. striplets_pack = [None] def striplets(): if striplets_pack[0] is None: striplets_pack[0] = build_splitting_triplets(emsgs, cat, options) return striplets_pack[0] # Check whether diffs apply, and where and how if they do. rejected_emsgs_flags = [] patch_specs = [] for emsg in emsgs: pspecs = msg_apply_diff(cat, emsg, ecat, pmsgkeys, striplets) for pspec in pspecs: emsg_m, flag = pspec[:2] if flag == _flag_ediff or options.embed: patch_specs.append(pspec) if flag != _flag_ediff: rejected_emsgs_flags.append((emsg_m, flag)) # Sort accepted patches by position of application. patch_specs.sort(key=lambda x: x[3]) # Add accepted patches to catalog. incpos = 0 for emsg, flag, typ, pos, msg1, msg2, msg1_s, msg2_s in patch_specs: if pos is not None: pos += incpos if options.embed: # Embedded diff may conflict one of the messages in catalog. # Make a new diff of special messages, # and embed them either into existing message in catalog, # or into new message. if typ == _pt_merge: tmsg = cat[pos] tpos = pos else: tmsg = MessageUnsafe(msg2 or {}) tpos = None emsg = msg_ediff(msg1_s, msg2_s, emsg=tmsg, ecat=cat, eokpos=tpos) if 0: pass elif typ == _pt_merge: if not options.embed: cat[pos].set_inv(msg2) else: cat[pos].flag.add(flag) elif typ == _pt_insert: if not options.embed: cat.add(Message(msg2), pos) else: cat.add(Message(emsg), pos) cat[pos].flag.add(flag) incpos += 1 elif typ == _pt_remove: if pos is None: continue if not options.embed: cat.remove(pos) incpos -= 1 else: cat[pos].flag.add(flag) else: error_on_msg(_("@info", "Unknown patch type %(type)s.", type=typ), emsg, ecat) return rejected_emsgs_flags