def resolver_helper (msgstr, msg, cat, modtext, spanrep): errspans = [] tsegs = [] if ( mkeyw is None or (not invmkeyw and mkeyw.intersection(cat.markup() or set())) or (invmkeyw and not mkeyw.intersection(cat.markup() or set())) ): rsplit = split_by_uiref(msgstr, msg, cat, errspans) for ptext, uiref, start, end in rsplit: tsegs.append(ptext) if uiref is not None: uiref_res, errmsgs = resolve_single_uiref(uiref, msg, cat, resolver_helper) tsegs.append(uiref_res) errspans.extend([(start, end, x) for x in errmsgs]) if not spanrep and not quiet: for errmsg in errmsgs: warning_on_msg(errmsg, msg, cat) else: tsegs.append(msgstr) if modtext: # F3C hook return "".join(tsegs) elif spanrep: # V3C hook return errspans else: # S3C hook return len(errspans)
def process(self, msg, cat): for i in range(len(msg.msgstr)): msg.msgstr[i], nresolved, valid = \ resolve_alternatives(msg.msgstr[i], self.select, self.total, srcname=cat.filename) if valid: self.nresolved += nresolved else: warning_on_msg( _("@info", "Invalid alternatives directive " "in translation."), msg, cat)
def process(self, msg, cat): for i in range(len(msg.msgstr)): msg.msgstr[i], resolved, unknown = \ resolve_entities(msg.msgstr[i], self.entities, self.ignored_entities, cat.filename) self.nresolved += len(resolved) if unknown: warning_on_msg( _("@info", "Unknown entities in translation: " "%(entlist)s.", entlist=format_item_list(unknown)), msg, cat)
def _naked_latin_w(msgstr, msg, cat, origui=False, sideeffect=False): # Avoid meta-messages. if (msg.msgctxt in ("EMAIL OF TRANSLATORS", ) or (cat.name.endswith(".desktop") and msg.msgctxt in ("Keywords", "Query"))): if sideeffect: return 0 else: return [] # Avoid specially tagged messages. for auto_cmnt in msg.auto_comment: if _auto_cmnt_tag_rx.search(auto_cmnt): if sideeffect: return 0 else: return [] # Eliminate all no-check segments. stripped_msgstr = msgstr if origui: # must come before tag removal for rx in _no_check_lat_origui_rxs: stripped_msgstr = rx.sub("", stripped_msgstr) for rx in _no_check_lat_rxs: stripped_msgstr = rx.sub("", stripped_msgstr) matches = list(_naked_latin_rx.finditer(stripped_msgstr)) if sideeffect: # Report if any Latin text remained in stripped msgstr. for m in matches: warning_on_msg( _("@info", "Naked Latin segment '%(snippet)s'.", snippet=m.group(0)), msg, cat) return len(matches) else: # Collect and adapt offending spans. spans = [m.span() for m in matches] spans = adapt_spans(msgstr, stripped_msgstr, spans, merge=False) return spans
def _literals_spec(msg, cat): fname = "literal-segment" rx_strs = manc_parse_field_values(msg, fname) # Compile regexes. # Empty regex indicates not to do any heuristic removal. rxs = [] heuristic = True for rx_str in rx_strs: if rx_str: try: rxs.append(re.compile(rx_str, re.U | re.S)) except: warning_on_msg( _("@info", "Field %(field)s states " "malformed regex '%(re)s'.", field=fname, re=rx_str), msg, cat) else: heuristic = False return [], rxs, heuristic
def rewrite_inverse(msg, cat): """ Rewrite message by replacing all its elements with that of another message which has the same C{msgstr[0]} [type F4A hook]. Translator comments may issue C{rewrite-inverse} directives to replace all message parts with those from another message having the same C{msgstr[0]} field. The argument to the directive is a regular expression search pattern on C{msgid} and C{msgctxt} (leading and trailing whitespace get stripped) which is used to select the particular message if more than one other messages have same C{msgstr[0]}. Examples:: # rewrite-inverse: # rewrite-inverse: Foo If the pattern does not match or it matches more than one other message, current message is not touched; also if the pattern is left empty and there is more than one other message. Search pattern is applied to C{msgctxt} and C{msgid} in turn, and the message is matched if any matches. Search pattern is case-sensitive. If more than one C{rewrite-inverse} directive is seen, or the search pattern is not valid, a warning on message is issued and current message is not touched. This hook is then executed again on the resulting message, in case the new translator comments contain another C{rewrite-inverse} directive. @return: number of errors """ # Collect and compile regular expressions. fname = "rewrite-inverse" rwspecs = manc_parse_field_values(msg, fname) if not rwspecs: return 0 if len(rwspecs) > 1: warning_on_msg( _("@info", "More than one inverse rewrite directive " "encountered."), msg, cat) return 1 srch = rwspecs[0] try: rx = re.compile(srch, re.U) except: warning_on_msg( _("@info", "Invalid search pattern '%(pattern)s' in " "inverse rewrite directive.", pattern=srch), msg, cat) return 1 msgs = cat.select_by_msgstr(msg.msgstr[0], lazy=True) msgs = [x for x in msgs if x.key != msg.key] # remove current if not msgs: warning_on_msg( _( "@info", "There are no other messages with same translation, " "needed by inverse rewrite directive."), msg, cat) return 1 match = lambda x: ( (x.msgctxt is not None and rx.search(x.msgctxt)) or rx.search(x.msgid)) sel_msgs = [x for x in msgs if match(x)] # remove non-matched if not sel_msgs: warning_on_msg( _( "@info", "Inverse rewrite directive matches none of " "the other messages with same translation."), msg, cat) return 1 if len(sel_msgs) > 1: warning_on_msg( _( "@info", "Inverse rewrite directive matches more than " "one other message with same translation."), msg, cat) return 1 # Copy all parts of the other message. omsg = sel_msgs[0] msg.msgid = omsg.msgid if msg.msgid_plural is not None and omsg.msgid_plural is not None: msg.msgid_plural = omsg.msgid_plural # Copy comments and recurse. msg.set(omsg) nerrors = rewrite_inverse(msg, cat) return nerrors
def rewrite_msgid(msg, cat): """ Rewrite parts of C{msgid} based on translator comments [type F4A hook]. Translator comments may issue C{rewrite-msgid} directives to modify parts of C{msgid} (as well as C{msgid_plural}) fields by applying a search regular expression and replace pattern. The search and replace pattern are wrapped and separated by any character consistently used, such as slashes. Examples:: # rewrite-msgid: /foo/bar/ # rewrite-msgid: /foo (\\w+) fam/bar \\1 bam/ # rewrite-msgid: :foo/bar:foo/bam: If a search pattern is not valid, a warning on message is issued. Search pattern is case-sensitive. @return: number of errors """ nerrors = 0 # Collect and compile regular expressions. fname = "rewrite-msgid" rwspecs = manc_parse_field_values(msg, fname) rwrxs = [] for rwspec in rwspecs: sep = rwspec[0:1] if not sep: warning_on_msg(_("@info", "No patterns in rewrite directive."), msg, cat) nerrors += 1 continue lst = rwspec.split(sep) if len(lst) != 4 or lst[0] or lst[3]: warning_on_msg( _("@info", "Wrongly separated patterns in " "rewrite directive '%(dir)s'.", dir=rwspec), msg, cat) nerrors += 1 continue srch, repl = lst[1], lst[2] try: rx = re.compile(srch, re.U) except: warning_on_msg( _("@info", "Invalid search pattern in " "rewrite directive '%(dir)s'.", dir=rwspec), msg, cat) nerrors += 1 continue rwrxs.append((rx, repl, rwspec)) for rx, repl, rwspec in rwrxs: try: msg.msgid = rx.sub(repl, msg.msgid) if msg.msgid_plural is not None: msg.msgid_plural = rx.sub(repl, msg.msgid_plural) except: warning_on_msg( _("@info", "Error in application of " "rewrite directive '%(dir)s'.", dir=rwspec), msg, cat) nerrors += 1 return nerrors
def hybdl(path, path0, accnohyb=False): cat = Catalog(path) cat0 = Catalog(path0, monitored=False) nhybridized = 0 nstopped = 0 for msg in cat: if "no-hybdl" in manc_parse_flag_list(msg, "|"): continue # Unembed diff if message was diffed for review. # Replace ediff with manual review flag. diffed = False for flag in msg.flag: if flag.startswith("ediff"): msg.flag.remove(flag) diffed = True if diffed: msg_ediff_to_new(msg, msg) msg.flag.add(u"reviewed") # Fetch original message. msg0 = cat0.get(msg) if msg0 is None: warning_on_msg( _("@info", "Message does not exist in the original catalog."), msg, cat) nstopped += 1 continue if len(msg.msgstr) != len(msg0.msgstr): warning_on_msg( _( "@info", "Number of translations not same as in " "the original message."), msg, cat) nstopped += 1 continue if msg.msgstr == msg0.msgstr: # No changes, nothing new to hybridize. continue # Hybridize translation. textsh = [] textshinv = [] for text0, text in zip(msg0.msgstr, msg.msgstr): texth = tohi(text0, text, parthyb=True) textsh.append(texth) if not accnohyb: texthinv = tohi(text, text0, parthyb=True) textshinv.append(texthinv) if accnohyb or textsh == textshinv: for i, texth in zip(range(len(msg.msgstr)), textsh): msg.msgstr[i] = texth nhybridized += 1 else: nstopped += 1 msgh = MessageUnsafe(msg) msgh.msgstr = textsh msghinv = MessageUnsafe(msg) msghinv.msgstr = textshinv msg_ediff(msghinv, msgh, emsg=msgh, colorize=True) report_msg_content(msgh, cat, delim=("-" * 20)) if nstopped == 0: if cat.sync(): report("! %s (%d)" % (path, nhybridized)) else: warning( n_("@info", "%(num)d message in '%(file)s' cannot be " "cleanly hybridized.", "%(num)d messages in '%(file)s' cannot be " "cleanly hybridized.", num=nstopped, file=path)) nhybridized = 0 return nhybridized
def unembed_ediff(path, all=False, old=False): try: cat = Catalog(path) except: warning( _("@info", "Error reading catalog '%(file)s', skipping it.", file=path)) return hmsgctxt = cat.header.get_field_value(EDST.hmsgctxt_field) if hmsgctxt is not None: cat.header.remove_field(EDST.hmsgctxt_field) uehmsg = None unembedded = {} for msg in cat: ediff_flag = None for flag in _flags_all: if flag in msg.flag: ediff_flag = flag msg.flag.remove(flag) if not ediff_flag and not all: continue if ediff_flag in (_flag_ediff_no_match, _flag_ediff_to_new): # Throw away fully rejected embeddings, i.e. reject the patch. # For split-difference embeddings, throw away the current-to-new; # this effectively rejects the patch, which is safest thing to do. cat.remove_on_sync(msg) elif hmsgctxt is not None and msg.msgctxt == hmsgctxt: if uehmsg: warning_on_msg( _("@info", "Unembedding results in duplicate header, " "previous header at %(line)d(#%(entry)d); " "skipping it.", line=uehmsg.refline, entry=uehmsg.refentry), msg, cat) return msg_ediff_to_x = not old and msg_ediff_to_new or msg_ediff_to_old hmsg = msg_ediff_to_x(clear_header_metadata(msg)) if hmsg.msgstr and hmsg.msgstr[0]: cat.header = Header(hmsg) cat.remove_on_sync(msg) uehmsg = msg else: msg1, msg2, msg1_s, msg2_s = resolve_diff_pair(msg) tmsg = (not old and (msg2, ) or (msg1, ))[0] if tmsg is not None: if tmsg.key in unembedded: msg_p = unembedded[tmsg.key] warning_on_msg( _("@info", "Unembedding results in " "duplicate message, previous message " "at %(line)d(#%(entry)d); skipping it.", line=msg_p.refline, entry=msg_p.refentry), msg, cat) return msg.set(Message(msg2)) unembedded[tmsg.key] = msg else: cat.remove_on_sync(msg) if cat.sync(): report(_("@info:progress", "Unembedded: %(file)s", file=cat.filename))
def process (self, msg, cat): if not msg.translated or msg.obsolete: return if msg.msgid_plural is not None: return # Parse property map entries from the message. psep, kvsep = None, None ekeys = set() props = {} for i in range(len(msg.manual_comment)): ind = i + 1 manc = (msg.manual_comment[i]).strip() if manc.startswith(self.p.pmhead): # Parse and check consistency of separators. espec = manc[len(self.p.pmhead):].lstrip() lkvsep, lpsep = espec[:2] if lkvsep.isalnum() or lpsep.isalnum(): warning_on_msg(_("@info", "An alphanumeric separator is used for " "property map entry in comment " "no. %(ord)d.", ord=ind), msg, cat) return if not psep: psep, kvsep = lpsep, lkvsep elif (psep, kvsep) != (lpsep, lkvsep): warning_on_msg(_("@info", "Inconsistent separators for " "continued property map entry in comment " "no. %(ord)d.", ord=ind), msg, cat) return # Remove leading and trailing separators. respec = espec[2:] if respec.endswith(psep + psep): respec = respec[:-2] elif respec.endswith(psep): respec = respec[:-1] else: warning_on_msg(_("@info", "Missing terminating separator for " "property map entry in comment " "no. %(ord)d.", ord=ind), msg, cat) return # Parse entry keys and key-value pairs. for elspec in respec.split(psep): if kvsep in elspec: pkey, pval = elspec.split(kvsep, 1) props[pkey] = pval else: ekey = elspec if not self.p.extrakeys: warning_on_msg(_("@info", "Additional entry key '%(key)s' " "is defined but not allowed for " "property map entry in comment " "no. %(ord)d.", key=ekey, ord=ind), msg, cat) return ekeys.add(ekey) elif manc.startswith(self.p.sdhead): sddef = manc[len(self.p.sdhead):].lstrip() sdkey = str(self.sdord) sdexpr = sdkey + ":" + sddef if self.p.derivs: sdexpr = ">" + self.p.derivs + "\n" + sdexpr try: self.synder.import_string(sdexpr) cprops = self.synder.props(sdkey) except Exception, e: errmsg = str_to_unicode(str(e)) warning_on_msg(_("@info", "Invalid derivation '%(deriv)s':\n" "%(msg)s", deriv=sddef, msg=errmsg), msg, cat) return jumble = "".join(["".join(x) for x in cprops.items()]) if not psep: psep = self._pick_sep(jumble, u"/|¦") kvsep = self._pick_sep(jumble, u"=:→") if not psep or not kvsep: warning_on_msg(_("@info", "No known separator are applicable " "to keys and values derived from " "'%(deriv)s'.", deriv=sddef), msg, cat) return else: if psep in jumble or kvsep in jumble: warning_on_msg(_("@info", "Previously selected separators " "are not applicable to " "keys and values derived from " "'%(deriv)s'.", deriv=sddef), msg, cat) return props.update(cprops)
class Sieve (object): def __init__ (self, params): self.caller_sync = False self.caller_monitored = False self.propcons = None if params.propcons: self.propcons = self._read_propcons(params.propcons) self.p = params if not params.pmhead: raise SieveError(_("@info", "Prefix which starts property map entries " "in comments cannot be empty.")) if not params.sdhead: raise SieveError(_("@info", "Prefix which starts syntagma derivator entries " "in comments cannot be empty.")) # Collected entries. # Each element is a tuple of the form: # (ekeys, props, psep, kvsep, msg, cat) self.entries = [] # Syntagma derivator, for synder entries. self.synder = Synder() self.sdord = 0 def process (self, msg, cat): if not msg.translated or msg.obsolete: return if msg.msgid_plural is not None: return # Parse property map entries from the message. psep, kvsep = None, None ekeys = set() props = {} for i in range(len(msg.manual_comment)): ind = i + 1 manc = (msg.manual_comment[i]).strip() if manc.startswith(self.p.pmhead): # Parse and check consistency of separators. espec = manc[len(self.p.pmhead):].lstrip() lkvsep, lpsep = espec[:2] if lkvsep.isalnum() or lpsep.isalnum(): warning_on_msg(_("@info", "An alphanumeric separator is used for " "property map entry in comment " "no. %(ord)d.", ord=ind), msg, cat) return if not psep: psep, kvsep = lpsep, lkvsep elif (psep, kvsep) != (lpsep, lkvsep): warning_on_msg(_("@info", "Inconsistent separators for " "continued property map entry in comment " "no. %(ord)d.", ord=ind), msg, cat) return # Remove leading and trailing separators. respec = espec[2:] if respec.endswith(psep + psep): respec = respec[:-2] elif respec.endswith(psep): respec = respec[:-1] else: warning_on_msg(_("@info", "Missing terminating separator for " "property map entry in comment " "no. %(ord)d.", ord=ind), msg, cat) return # Parse entry keys and key-value pairs. for elspec in respec.split(psep): if kvsep in elspec: pkey, pval = elspec.split(kvsep, 1) props[pkey] = pval else: ekey = elspec if not self.p.extrakeys: warning_on_msg(_("@info", "Additional entry key '%(key)s' " "is defined but not allowed for " "property map entry in comment " "no. %(ord)d.", key=ekey, ord=ind), msg, cat) return ekeys.add(ekey) elif manc.startswith(self.p.sdhead): sddef = manc[len(self.p.sdhead):].lstrip() sdkey = str(self.sdord) sdexpr = sdkey + ":" + sddef if self.p.derivs: sdexpr = ">" + self.p.derivs + "\n" + sdexpr try: self.synder.import_string(sdexpr) cprops = self.synder.props(sdkey) except Exception, e: errmsg = str_to_unicode(str(e)) warning_on_msg(_("@info", "Invalid derivation '%(deriv)s':\n" "%(msg)s", deriv=sddef, msg=errmsg), msg, cat) return jumble = "".join(["".join(x) for x in cprops.items()]) if not psep: psep = self._pick_sep(jumble, u"/|¦") kvsep = self._pick_sep(jumble, u"=:→") if not psep or not kvsep: warning_on_msg(_("@info", "No known separator are applicable " "to keys and values derived from " "'%(deriv)s'.", deriv=sddef), msg, cat) return else: if psep in jumble or kvsep in jumble: warning_on_msg(_("@info", "Previously selected separators " "are not applicable to " "keys and values derived from " "'%(deriv)s'.", deriv=sddef), msg, cat) return props.update(cprops) if not props: if ekeys: warning_on_msg(_("@info", "Some additional entry keys " "are defined for property map entry, " "but there are no properties."), msg, cat) return props = sorted(props.items()) # no need for dictionary any more # Add default keys. ekeys.add(msg.msgid) ekeys.add(msg.msgstr[0]) # Validate entry if requested. if self.propcons: errs = self._validate_props(props, msg, cat, self.propcons) if errs: problems = cjoin([" " + x for x in errs], "\n") warning_on_msg(_("@info", "Property map entry fails validation:\n" "%(msgs)s", msgs=problems), msg, cat) return # Entry parsed. ekeys = sorted(ekeys) props = sorted(props) self.entries.append((ekeys, props, psep, kvsep, msg, cat))
def finalize (self): # Check cross-entry validity, select valid. msgs_by_seen_msgstr = {} unique_entries = [] for entry in self.entries: d1, props, d3, d4, msg, cat = entry msgstr = msg.msgstr[0] if msgstr not in msgs_by_seen_msgstr: msgs_by_seen_msgstr[msgstr] = [] else: for d1, d2, oprops in msgs_by_seen_msgstr[msgstr]: if props == oprops: props = None break if props: unique_entries.append(entry) msgs_by_seen_msgstr[msgstr].append((msg, cat, props)) good_entries = [] for ekeys, props, psep, kvsep, msg, cat in unique_entries: eq_msgstr_set = msgs_by_seen_msgstr.get(msg.msgstr[0]) if eq_msgstr_set is not None: if len(eq_msgstr_set) > 1: cmsgcats = msgs_by_seen_msgstr.pop(msg.msgstr[0]) msg0, cat0, d3 = cmsgcats[0] warning_on_msg(_("@info split to link below", "Property map entries removed due " "to translation conflict with..."), msg0, cat0) for msg, cat, d3 in cmsgcats[1:]: warning_on_msg(_("@info continuation from above", "...this message."), msg, cat) else: good_entries.append((ekeys, props, psep, kvsep)) # If output file has not been given, only validation was expected. if not self.p.outfile: return # Serialize entries. good_entries.sort(key=lambda x: x[0]) lines = [] for ekeys, props, psep, kvsep in good_entries: # Do Unicode, locale-unaware sorting, # for equal results over different systems; # they are not to be read by humans anyway. propstr = psep.join([kvsep.join(x) for x in sorted(props)]) ekeystr = psep.join(sorted(ekeys)) estr = kvsep + psep + ekeystr + psep + propstr + psep + psep lines.append(estr) # Write out the property map. lines.append("") fstr = "\n".join(lines) fstr = fstr.encode("UTF-8") fh = open(self.p.outfile, "w") fh.write(fstr) fh.close() msg = n_("@info:progress", "Collected %(num)d entry for the property map.", "Collected %(num)d entries for the property map.", num=len(good_entries)) report("===== " + msg)
def split_by_uiref (text, msg, cat, errspans): rsplit = [] ltext = len(text) p = 0 while True: mt = uiref_start_tag_rx.search(text, p) if mt: pt = mt.start() else: pt = ltext mh = uiref_start_head_rx.search(text, p) if mh: ph = mh.start() else: ph = ltext if pt < ph: # Tagged UI reference. tag = mt.group(1) m = uiref_extract_tag_rx[tag].search(text, pt) if not m: errmsg = _("@info \"tag\" is a tag in HTML/XML context", "Non-terminated UI reference by tag '%(tag)s'.", tag=tag) errspans.append(mt.span() + (errmsg,)) if not spanrep and not quiet: warning_on_msg(errmsg, msg, cat) break uirefpath = m.group(2) pe = m.end() - len(m.group(3)) ps = pe - len(uirefpath) elif ph < pt: # Headed UI reference. head = mh.group(1) m = uiref_extract_head_rx[head].search(text, ph) if not m: errmsg = _("@info \"head\" is the leading part of " "UI reference, e.g. '~%' in '~%/Save All/'", "Non-terminated UI reference by " "head '%(head)s'.", head=head) errspans.append(mh.span() + (errmsg,)) if not spanrep and not quiet: warning_on_msg(errmsg, msg, cat) break uirefpath = m.group(2) ps, pe = m.span() else: # Both positions equal, meaning end of text. break ptext_uiref = _split_uirefpath(text[p:ps], uirefpath, uipathseps) for ptext, uiref in ptext_uiref: rsplit.append((ptext, uiref, ps, pe)) p = pe # Trailing segment (or everything after an error). rsplit.append((text[p:], None, -1, -1)) return rsplit