def do_one_page_verb(page, index, text): pagename = page.title() verbcount = 0 verbids = [] for template in text.filter_templates(): if template.name == "ar-conj": verbcount += 1 vnvalue = getparam(template, "vn") uncertain = False if vnvalue.endswith("?"): vnvalue = vnvalue[:-1] msg("Page %s %s: Verbal noun(s) identified as uncertain" % ( index, pagename)) uncertain = True if not vnvalue: continue vns = re.split(u"[,،]", vnvalue) form = getparam(template, "1") verbid = "#%s form %s" % (verbcount, form) if re.match("^[1I](-|$)", form): verbid += " (%s,%s)" % (getparam(template, "2"), getparam(template, "3")) no_i3rab_vns = [] for vn in vns: no_i3rab_vns.append(remove_i3rab(pagename, index, verbid, vn)) newvn = ",".join(no_i3rab_vns) if uncertain: newvn += "?" if newvn != vnvalue: msg("Page %s %s: Verb %s, replacing %s with %s" % ( index, pagename, verbid, vnvalue, newvn)) addparam(template, "vn", newvn) verbids.append(verbid) return text, "Remove i3rab from verbal nouns for verb(s) %s" % ( ', '.join(verbids))
def canon_param(pagetitle, index, template, param, paramtr, include_tempname_in_changelog=False): if isinstance(param, list): fromparam, toparam = param else: fromparam, toparam = (param, param) arabic = (pagetitle if fromparam == "page title" else getparam( template, fromparam)) latin = getparam(template, paramtr) if not arabic: return False canonarabic, canonlatin, actions = do_canon_param( pagetitle, index, template, fromparam, toparam, paramtr, arabic, latin, include_tempname_in_changelog) oldtempl = "%s" % unicode(template) if canonarabic: addparam(template, toparam, canonarabic) if canonlatin == True: template.remove(paramtr) elif canonlatin: addparam(template, paramtr, canonlatin) if canonarabic or canonlatin: msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) return actions
def canon_param(pagetitle, index, template, lang, param, paramtr, translit_module): if isinstance(param, list): fromparam, toparam = param else: fromparam, toparam = (param, param) foreign = (pagetitle if fromparam == "page title" else getparam( template, fromparam)) latin = getparam(template, paramtr) if not foreign: return False canonforeign, canonlatin, actions = do_canon_param(pagetitle, index, template, lang, fromparam, toparam, paramtr, foreign, latin, translit_module) oldtempl = "%s" % unicode(template) if canonforeign: add_param_handling_head(template, toparam, canonforeign) if canonlatin == True: template.remove(paramtr) elif canonlatin: addparam(template, paramtr, canonlatin) if canonforeign or canonlatin: msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) return actions
def canonicalize_one_page_verb_form(page, index, text): pagetitle = page.title() msg("Processing page %s" % pagetitle) actions_taken = [] for template in text.filter_templates(): if template.name == tempname: origtemp = unicode(template) form = getparam(template, formarg) if form: addparam(template, formarg, canonicalize_form(form)) newtemp = unicode(template) if origtemp != newtemp: msg("Replacing %s with %s" % (origtemp, newtemp)) if re.match("^[1I](-|$)", form): actions_taken.append( "form=%s (%s/%s)" % (form, getparam(template, str(1 + int(formarg))), getparam(template, str(2 + int(formarg))))) else: actions_taken.append("form=%s" % form) changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % ( tempname, formarg, '; '.join(actions_taken)) if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog
def rewrite_one_page_verb_headword(page, index, text): pagetitle = page.title() msg("Processing page %s" % pagetitle) actions_taken = [] for template in text.filter_templates(): if template.name in ["ar-verb"]: origtemp = unicode(template) form = getparam(template, "form") if form: # In order to keep in the same order, just forcibly change the # param "names" (numbers) for pno in xrange(10, 0, -1): if template.has(str(pno)): template.get(str(pno)).name = str(pno + 1) # Make sure form= param is first ... template.remove("form") addparam(template, "form", canonicalize_form(form), before=template.params[0].name if len(template.params) > 0 else None) # ... then forcibly change its name to 1= template.get("form").name = "1" template.get("1").showkey = False newtemp = unicode(template) if origtemp != newtemp: msg("Replacing %s with %s" % (origtemp, newtemp)) if re.match("^[1I](-|$)", form): actions_taken.append("form=%s (%s/%s)" % (form, getparam(template, "2"), getparam(template, "3"))) else: actions_taken.append("form=%s" % form) changelog = "ar-verb: form= -> 1= and canonicalize to Roman numerals, move other params up: %s" % '; '.join(actions_taken) if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog
def canon_param(pagetitle, index, template, param, paramtr, translit_module, include_tempname_in_changelog=False): if isinstance(param, list): fromparam, toparam = param else: fromparam, toparam = (param, param) foreign = (pagetitle if fromparam == "page title" else getparam(template, fromparam)) latin = getparam(template, paramtr) if not foreign: return False canonforeign, canonlatin, actions = do_canon_param(pagetitle, index, template, fromparam, toparam, paramtr, foreign, latin, translit_module, include_tempname_in_changelog) oldtempl = "%s" % unicode(template) if canonforeign: addparam(template, toparam, canonforeign) if canonlatin == True: template.remove(paramtr) elif canonlatin: addparam(template, paramtr, canonlatin) if canonforeign or canonlatin: msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) return actions
def canonicalize_one_page_verb_form(page, index, text): pagetitle = page.title() msg("Processing page %s" % pagetitle) actions_taken = [] for template in text.filter_templates(): if template.name == tempname: origtemp = unicode(template) form = getparam(template, formarg) if form: addparam(template, formarg, canonicalize_form(form)) newtemp = unicode(template) if origtemp != newtemp: msg("Replacing %s with %s" % (origtemp, newtemp)) if re.match("^[1I](-|$)", form): actions_taken.append("form=%s (%s/%s)" % (form, getparam(template, str(1+int(formarg))), getparam(template, str(2+int(formarg))))) else: actions_taken.append("form=%s" % form) changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % ( tempname, formarg, '; '.join(actions_taken)) if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog
def process_template(pagetitle, index, template, ruparam, trparam, output_line, find_accents, verbose): origt = unicode(template) saveparam = ruparam def pagemsg(text): msg("Page %s %s: %s" % (index, pagetitle, text)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, semi_verbose) if semi_verbose: pagemsg("Processing template: %s" % unicode(template)) if unicode(template.name) == "head": # Skip {{head}}. We don't want to mess with headwords. return False if isinstance(ruparam, list): ruparam, saveparam = ruparam if ruparam == "page title": val = pagetitle else: val = getparam(template, ruparam) valtr = getparam(template, trparam) if trparam else "" changed = False if find_accents: newval, newtr = find_accented(val, valtr, verbose, pagemsg, expand_text, origt) if newval != val or newtr != valtr: if ru.remove_accents(newval) != ru.remove_accents(val): pagemsg("WARNING: Accented page %s changed from %s in more than just accents, not changing" % (newval, val)) else: changed = True addparam(template, saveparam, newval) if newtr: if not trparam: pagemsg("WARNING: Unable to change translit to %s because no translit param available (Cyrillic param %s): %s" % (newtr, saveparam, origt)) elif unicode(template.name) in ["ru-ux"]: pagemsg("WARNING: Not changing or adding translit param %s=%s to ru-ux: origt=%s" % ( trparam, newtr, origt)) else: if valtr and valtr != newtr: pagemsg("WARNING: Changed translit param %s from %s to %s: origt=%s" % (trparam, valtr, newtr, origt)) if not valtr: pagemsg("NOTE: Added translit param %s=%s to template: origt=%s" % (trparam, newtr, origt)) addparam(template, trparam, newtr) elif valtr: pagemsg("WARNING: Template has translit %s but lookup result has none, leaving translit alone: origt=%s" % (valtr, origt)) if check_need_accent(newval): output_line("Need accents (changed)") else: output_line("Found accents") if not changed and check_need_accent(val): output_line("Need accents") if changed: pagemsg("Replaced %s with %s" % (origt, unicode(template))) return ["auto-accent %s%s" % (newval, "//%s" % newtr if newtr else "")] if changed else False
def rewrite_one_page_ar_nisba(page, index, text): for template in text.filter_templates(): if template.name == "ar-nisba": if template.has("head") and not template.has(1): head = unicode(template.get("head").value) template.remove("head") addparam(template, "1", head, before=template.params[0].name if len(template.params) > 0 else None) if template.has("plhead"): blib.msg("%s has plhead=" % page.title()) return text, "ar-nisba: head= -> 1="
def add_param_handling_head(template, param, value): if param != "head": addparam(template, param, value) return before = None for paramobj in template.params: pname = unicode(paramobj.name).strip() if re.match("^[0-9]+", pname): continue before = pname break addparam(template, param, value, before=before)
def fix(page, index, text): for template in text.filter_templates(): if template.name in arabiclib.arabic_all_headword_templates: if template.has("head") and not template.has(1) and not template.has(2) and not template.has(3) and not template.has(4) and not template.has(5) and not template.has(6) and not template.has(7) and not template.has(8): head = unicode(template.get("head").value) template.remove("head") addparam(template, "head", head, before=template.params[0].name if len(template.params) > 0 else None) if template.params[0].name == "head": template.get("head").showkey = False return text, "ar headword: head= > 1="
def vocalize_param(pagetitle, index, template, param, paramtr): arabic = getparam(template, param) latin = getparam(template, paramtr) if not arabic: return False if latin: vocalized = do_vocalize_param(pagetitle, index, template, param, arabic, latin) if vocalized: oldtempl = "%s" % unicode(template) addparam(template, param, vocalized) msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) return vocalized return True
def fix_one_page_tool_place_noun(page, index, text): pagetitle = page.title() for t in text.filter_templates(): if t.name == template: if getparam(t, "cap"): msg("Page %s %s: Template %s: Remove cap=" % (index, pagetitle, template)) t.remove("cap") else: msg("Page %s %s: Template %s: Add lc=1" % (index, pagetitle, template)) addparam(t, "lc", "1") changelog = "%s: If cap= is present, remove it, else add lc=" % template msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog)) return text, changelog
def fix(page, index, text): for template in text.filter_templates(): if template.name in arabiclib.arabic_all_headword_templates: if template.has("head") and not template.has( 1) and not template.has(2) and not template.has( 3) and not template.has(4) and not template.has( 5) and not template.has(6) and not template.has( 7) and not template.has(8): head = unicode(template.get("head").value) template.remove("head") addparam(template, "head", head, before=template.params[0].name if len(template.params) > 0 else None) if template.params[0].name == "head": template.get("head").showkey = False return text, "ar headword: head= > 1="
def rewrite_one_page_verb_headword(page, index, text): pagetitle = page.title() msg("Processing page %s" % pagetitle) actions_taken = [] for template in text.filter_templates(): if template.name in ["ar-verb"]: origtemp = unicode(template) form = getparam(template, "form") if form: # In order to keep in the same order, just forcibly change the # param "names" (numbers) for pno in xrange(10, 0, -1): if template.has(str(pno)): template.get(str(pno)).name = str(pno + 1) # Make sure form= param is first ... template.remove("form") addparam(template, "form", canonicalize_form(form), before=template.params[0].name if len(template.params) > 0 else None) # ... then forcibly change its name to 1= template.get("form").name = "1" template.get("1").showkey = False newtemp = unicode(template) if origtemp != newtemp: msg("Replacing %s with %s" % (origtemp, newtemp)) if re.match("^[1I](-|$)", form): actions_taken.append( "form=%s (%s/%s)" % (form, getparam(template, "2"), getparam(template, "3"))) else: actions_taken.append("form=%s" % form) changelog = "ar-verb: form= -> 1= and canonicalize to Roman numerals, move other params up: %s" % '; '.join( actions_taken) if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog
def fix_one_page_smp(page, index, text): pagetitle = page.title() for t in text.filter_templates(): head = reorder_shadda(getparam(t, "1")) if t.name.startswith("ar-decl-"): param = "pl" pl = getparam(t, param) i = 2 while pl: if pl == "smp": if head.endswith(TAM): msg("Page %s %s: WARNING: Found %s=smp with feminine ending head %s in %s: not changing" % (index, pagetitle, param, head, t.name)) else: msg("Page %s %s: Changing %s=smp to %s=sp in %s" % (index, pagetitle, param, param, t.name)) addparam(t, param, "sp") param = "pl%s" % i pl = getparam(t, param) i += 1 changelog = "Change pl=smp to pl=sp" msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog)) return text, changelog
def fix_one_page_smp(page, index, text): pagetitle = page.title() for t in text.filter_templates(): head = reorder_shadda(getparam(t, "1")) if t.name.startswith("ar-decl-"): param = "pl" pl = getparam(t, param) i = 2 while pl: if pl == "smp": if head.endswith(TAM): msg("Page %s %s: WARNING: Found %s=smp with feminine ending head %s in %s: not changing" % ( index, pagetitle, param, head, t.name)) else: msg("Page %s %s: Changing %s=smp to %s=sp in %s" % ( index, pagetitle, param, param, t.name)) addparam(t, param, "sp") param = "pl%s" % i pl = getparam(t, param) i += 1 changelog = "Change pl=smp to pl=sp" msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog)) return text, changelog
def rewrite_one_page_ru_decl_noun(page, index, text): oldtemps = [] pagename = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagename, txt)) nochange = False change = False for t in text.filter_templates(): converted = True def tname(): return unicode(t.name).strip() origname = tname() origtemplate = unicode(t) if tname() in ["ru-noun-table", "ru-noun-old"]: continue stem = "" bare = "" accsg = "" locsg = "" if tname() == u"ru-noun-ин": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") full = getrmparam(t, "3") stem = stem or ustem declclass = u"ин" if stem + u"ин" == full: accentclass = "1" elif remove_diacritics(stem) + u"и́н" == full: accentclass = "4" elif stem == full: accentclass = "1" declclass = u"-е" else: pagemsg("WARNING: Can't locate accent class for template: %s" % origtemplate) nochange = True break change = True elif tname() == u"ru-noun-нок": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") uplural = getrmparam(t, "3") plural = getrmparam(t, "4") stem = stem or ustem plural = plural or uplural accentclass = "2" if stem.endswith(u"ё"): declclass = u"ёнок" stem = re.sub(u"ё$", "", stem) elif stem.endswith(u"о́"): declclass = u"онок" stem = re.sub(u"о́$", "", stem) else: pagemsg("WARNING: Template stem ends weirdly: %s" % origtemplate) nochange = True break if stem != re.sub(u"(я́|а́)$", "", plural): pagemsg("WARNING: Strange plural: %s" % origtemplate) nochange = True break if (declclass == u"ёнок" and not plural.endswith(u"я́") or declclass == u"онок" and not plural.endswith(u"а́")): pagemsg("WARNING: Unexpected plural ending for stem: %s" % origtemplate) nochange = True break change = True elif tname() == u"ru-noun-vel-3": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") bare = getrmparam(t, "3") locsg = getrmparam(t, "13") locpl = getrmparam(t, "14") stem = stem or ustem or bare or pagename declclass = "" accentclass = "3" if locpl and locpl != remove_diacritics(stem) + u"а́х": pagemsg("WARNING: Unexpected locative plural %s: %s" % (locpl, origtemplate)) nochange = True break change = True else: for entry in ru_noun_transl: if len(entry) == 3: regex, declclass, directive = entry m = re.match(regex, tname()) if not m: continue assert len(m.groups()) == 1 accentclass = m.group(1) else: assert len(entry) == 4 regex, declclass, directive, accentclass = entry m = re.match(regex, tname()) if not m: continue assert len(m.groups()) == 0 if directive == "stem": stem = getrmparam(t, "1") elif directive == "stem-bare": stem = getrmparam(t, "1") bare = getrmparam(t, "2") elif directive == "u-stem": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") stem = stem or ustem elif directive == "u-stem-bare": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") bare = getrmparam(t, "3") stem = stem or ustem or bare elif directive == "u-stem-pagename": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") stem = stem or ustem or pagename elif directive == "u-stem-bare-pagename": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") bare = getrmparam(t, "3") stem = stem or ustem or bare or pagename elif directive == "u-stem-u-bare": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") ubare = getrmparam(t, "3") bare = getrmparam(t, "4") stem = stem or ustem bare = bare or ubare elif directive in ["u-stem-u-bare-irregpl", "u-stem-u-bare-irregpl-irregaccsg"]: ustem = getrmparam(t, "1") stem = getrmparam(t, "2") ubare = getrmparam(t, "3") bare = getrmparam(t, "4") irregpl = getrmparam(t, "5") stem = irregpl or stem or ustem bare = bare or ubare if directive == "u-stem-u-bare-irregpl-irregaccsg": accsg = getrmparam(t, "6") elif directive in ["u-stem-minus-i", "u-stem-u-bare-minus-i"]: ustem = getrmparam(t, "1") stem = getrmparam(t, "2") stem = stem or ustem unstressedi = u"и" stressedi = u"и́" assert len(stressedi) == 2 if stem.endswith(unstressedi): stem = stem[0:-1] elif stem.endswith(stressedi): stem = stem[0:-2] else: pagemsg(u"WARNING: Stem %s doesn't end in и in %s, skipping" % (stem, unicode(t))) nochange = True break else: pagemsg("WARNING: Unknown directive %s, skipping" % directive) nochange = True break change = True break else: if re.match("^ru-noun-", tname()): pagemsg("Encountered unknown noun decl template %s" % unicode(t)) if change: if not stem: pagemsg("WARNING: Can't locate stem in %s, skipping" % origtemplate) nochange = True break anim = getrmparam(t, "anim") if anim: anim = "an" n = getrmparam(t, "n") notes = getrmparam(t, "note") if len(t.params) > 0: pagemsg("WARNING: Extraneous parameters in %s, skipping" % unicode(t)) nochange = True break addparam(t, "1", accentclass) addparam(t, "2", stem) addparam(t, "3", declclass) if bare: addparam(t, "4", bare) if acc_sg: addparam(t, "acc_sg", acc_sg) if loc_sg: addparam(t, "loc", loc_sg) if anim: addparam(t, "a", anim) if n: addparam(t, "n", n) if notes: addparam(t, "notes", notes) t.name = "ru-noun-table" pagemsg("Rewrote %s as %s" % (origtemplate, unicode(t))) oldtemps.append(origname) if nochange: return None, "" if oldtemps: comment = "convert %s -> ru-noun-table" % ", ".join(oldtemps) else: comment = None return text, comment
def rewrite_one_page_ru_decl_adj(page, index, text): oldtemps = [] pagename = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagename, txt)) for t in text.filter_templates(): converted = True def tname(): return unicode(t.name).strip() origname = tname() origtemplate = unicode(t) if tname() == "ru-adj-table": t.name = "ru-decl-adj" else: if re.match("^ru-adjective[0-9]", tname()): t.name = tname().replace("ru-adjective", "ru-adj") if tname() == "ru-passive participle decl": t.name = "ru-adj1" suffix = None if tname() == "ru-adj3-sja": suffix = u"ся" t.name = "ru-adj3" elif tname() == "ru-adj5-suffix": suffix = "-" + getparam(t, "8") t.name = "ru-adj5" if tname() in ending_for_ru_adj: if tname() == "ru-adj13": addparam(t, "2", ending_for_ru_adj[tname()]) rmparam(t, "8") rmparam(t, "7") rmparam(t, "6") rmparam(t, "5") rmparam(t, "4") rmparam(t, "3") elif tname() in ["ru-adj7", "ru-adj8", "ru-adj9", "ru-adj12"]: addparam(t, "1", getparam(t, "2").strip()) addparam(t, "2", ending_for_ru_adj[tname()]) rmparam(t, "8") rmparam(t, "7") rmparam(t, "6") rmparam(t, "5") rmparam(t, "4") rmparam(t, "3") else: addparam(t, "1", getparam(t, "2").strip()) addparam(t, "2", ending_for_ru_adj[tname()]) mshort = clean(getparam(t, "3")) if mshort and re.search(u"[аяоеыи]$", remove_diacritics(mshort)): pagemsg("WARNING: short masculine %s doesn't have right ending" % mshort) fshort = clean(getparam(t, "4")) if fshort and not re.search(u"[ая]$", remove_diacritics(fshort)): pagemsg("WARNING: short feminine %s doesn't have right ending" % fshort) nshort = clean(getparam(t, "5")) if nshort and not re.search(u"[ое]$", remove_diacritics(nshort)): pagemsg("WARNING: short neuter %s doesn't have right ending" % nshort) pshort = clean(getparam(t, "6")) if pshort and not re.search(u"[ыи]$", remove_diacritics(pshort)): pagemsg("WARNING: short plural %s doesn't have right ending" % pshort) rmparam(t, "8") rmparam(t, "7") rmparam(t, "6") rmparam(t, "5") rmparam(t, "4") rmparam(t, "3") if mshort: addparam(t, "3", mshort) # Note that fshort and nshort get reversed if nshort: addparam(t, "4", nshort) if fshort: addparam(t, "5", fshort) if pshort: addparam(t, "6", pshort) if suffix: addparam(t, "suffix", suffix) t.name = "ru-decl-adj" pagemsg("Rewrote %s as %s" % (origtemplate, unicode(t))) else: converted = False if converted: oldtemps.append(origname) if oldtemps: comment = "convert %s -> ru-decl-adj" % ", ".join(oldtemps) else: comment = None return text, comment
def rewrite_one_page_ru_decl_noun(page, index, text): oldtemps = [] pagename = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagename, txt)) nochange = False change = False for t in text.filter_templates(): converted = True def tname(): return unicode(t.name).strip() origname = tname() origtemplate = unicode(t) if tname() in ["ru-noun-table", "ru-noun-old"]: continue stem = "" bare = "" accsg = "" locsg = "" if tname() == u"ru-noun-ин": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") full = getrmparam(t, "3") stem = stem or ustem declclass = u"ин" if stem + u"ин" == full: accentclass = "1" elif remove_diacritics(stem) + u"и́н" == full: accentclass = "4" elif stem == full: accentclass = "1" declclass = u"-е" else: pagemsg("WARNING: Can't locate accent class for template: %s" % origtemplate) nochange = True break change = True elif tname() == u"ru-noun-нок": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") uplural = getrmparam(t, "3") plural = getrmparam(t, "4") stem = stem or ustem plural = plural or uplural accentclass = "2" if stem.endswith(u"ё"): declclass = u"ёнок" stem = re.sub(u"ё$", "", stem) elif stem.endswith(u"о́"): declclass = u"онок" stem = re.sub(u"о́$", "", stem) else: pagemsg("WARNING: Template stem ends weirdly: %s" % origtemplate) nochange = True break if stem != re.sub(u"(я́|а́)$", "", plural): pagemsg("WARNING: Strange plural: %s" % origtemplate) nochange = True break if (declclass == u"ёнок" and not plural.endswith(u"я́") or declclass == u"онок" and not plural.endswith(u"а́")): pagemsg("WARNING: Unexpected plural ending for stem: %s" % origtemplate) nochange = True break change = True elif tname() == u"ru-noun-vel-3": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") bare = getrmparam(t, "3") locsg = getrmparam(t, "13") locpl = getrmparam(t, "14") stem = stem or ustem or bare or pagename declclass = "" accentclass = "3" if locpl and locpl != remove_diacritics(stem) + u"а́х": pagemsg("WARNING: Unexpected locative plural %s: %s" % (locpl, origtemplate)) nochange = True break change = True else: for entry in ru_noun_transl: if len(entry) == 3: regex, declclass, directive = entry m = re.match(regex, tname()) if not m: continue assert len(m.groups()) == 1 accentclass = m.group(1) else: assert len(entry) == 4 regex, declclass, directive, accentclass = entry m = re.match(regex, tname()) if not m: continue assert len(m.groups()) == 0 if directive == "stem": stem = getrmparam(t, "1") elif directive == "stem-bare": stem = getrmparam(t, "1") bare = getrmparam(t, "2") elif directive == "u-stem": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") stem = stem or ustem elif directive == "u-stem-bare": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") bare = getrmparam(t, "3") stem = stem or ustem or bare elif directive == "u-stem-pagename": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") stem = stem or ustem or pagename elif directive == "u-stem-bare-pagename": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") bare = getrmparam(t, "3") stem = stem or ustem or bare or pagename elif directive == "u-stem-u-bare": ustem = getrmparam(t, "1") stem = getrmparam(t, "2") ubare = getrmparam(t, "3") bare = getrmparam(t, "4") stem = stem or ustem bare = bare or ubare elif directive in [ "u-stem-u-bare-irregpl", "u-stem-u-bare-irregpl-irregaccsg" ]: ustem = getrmparam(t, "1") stem = getrmparam(t, "2") ubare = getrmparam(t, "3") bare = getrmparam(t, "4") irregpl = getrmparam(t, "5") stem = irregpl or stem or ustem bare = bare or ubare if directive == "u-stem-u-bare-irregpl-irregaccsg": accsg = getrmparam(t, "6") elif directive in ["u-stem-minus-i", "u-stem-u-bare-minus-i"]: ustem = getrmparam(t, "1") stem = getrmparam(t, "2") stem = stem or ustem unstressedi = u"и" stressedi = u"и́" assert len(stressedi) == 2 if stem.endswith(unstressedi): stem = stem[0:-1] elif stem.endswith(stressedi): stem = stem[0:-2] else: pagemsg( u"WARNING: Stem %s doesn't end in и in %s, skipping" % (stem, unicode(t))) nochange = True break else: pagemsg("WARNING: Unknown directive %s, skipping" % directive) nochange = True break change = True break else: if re.match("^ru-noun-", tname()): pagemsg("Encountered unknown noun decl template %s" % unicode(t)) if change: if not stem: pagemsg("WARNING: Can't locate stem in %s, skipping" % origtemplate) nochange = True break anim = getrmparam(t, "anim") if anim: anim = "an" n = getrmparam(t, "n") notes = getrmparam(t, "note") if len(t.params) > 0: pagemsg("WARNING: Extraneous parameters in %s, skipping" % unicode(t)) nochange = True break addparam(t, "1", accentclass) addparam(t, "2", stem) addparam(t, "3", declclass) if bare: addparam(t, "4", bare) if acc_sg: addparam(t, "acc_sg", acc_sg) if loc_sg: addparam(t, "loc", loc_sg) if anim: addparam(t, "a", anim) if n: addparam(t, "n", n) if notes: addparam(t, "notes", notes) t.name = "ru-noun-table" pagemsg("Rewrote %s as %s" % (origtemplate, unicode(t))) oldtemps.append(origname) if nochange: return None, "" if oldtemps: comment = "convert %s -> ru-noun-table" % ", ".join(oldtemps) else: comment = None return text, comment
def rewrite_one_page_ru_decl_adj(page, index, text): oldtemps = [] pagename = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagename, txt)) for t in text.filter_templates(): converted = True def tname(): return unicode(t.name).strip() origname = tname() origtemplate = unicode(t) if tname() == "ru-adj-table": t.name = "ru-decl-adj" else: if re.match("^ru-adjective[0-9]", tname()): t.name = tname().replace("ru-adjective", "ru-adj") if tname() == "ru-passive participle decl": t.name = "ru-adj1" suffix = None if tname() == "ru-adj3-sja": suffix = u"ся" t.name = "ru-adj3" elif tname() == "ru-adj5-suffix": suffix = "-" + getparam(t, "8") t.name = "ru-adj5" if tname() in ending_for_ru_adj: if tname() == "ru-adj13": addparam(t, "2", ending_for_ru_adj[tname()]) rmparam(t, "8") rmparam(t, "7") rmparam(t, "6") rmparam(t, "5") rmparam(t, "4") rmparam(t, "3") elif tname() in ["ru-adj7", "ru-adj8", "ru-adj9", "ru-adj12"]: addparam(t, "1", getparam(t, "2").strip()) addparam(t, "2", ending_for_ru_adj[tname()]) rmparam(t, "8") rmparam(t, "7") rmparam(t, "6") rmparam(t, "5") rmparam(t, "4") rmparam(t, "3") else: addparam(t, "1", getparam(t, "2").strip()) addparam(t, "2", ending_for_ru_adj[tname()]) mshort = clean(getparam(t, "3")) if mshort and re.search(u"[аяоеыи]$", remove_diacritics(mshort)): pagemsg( "WARNING: short masculine %s doesn't have right ending" % mshort) fshort = clean(getparam(t, "4")) if fshort and not re.search(u"[ая]$", remove_diacritics(fshort)): pagemsg( "WARNING: short feminine %s doesn't have right ending" % fshort) nshort = clean(getparam(t, "5")) if nshort and not re.search(u"[ое]$", remove_diacritics(nshort)): pagemsg( "WARNING: short neuter %s doesn't have right ending" % nshort) pshort = clean(getparam(t, "6")) if pshort and not re.search(u"[ыи]$", remove_diacritics(pshort)): pagemsg( "WARNING: short plural %s doesn't have right ending" % pshort) rmparam(t, "8") rmparam(t, "7") rmparam(t, "6") rmparam(t, "5") rmparam(t, "4") rmparam(t, "3") if mshort: addparam(t, "3", mshort) # Note that fshort and nshort get reversed if nshort: addparam(t, "4", nshort) if fshort: addparam(t, "5", fshort) if pshort: addparam(t, "6", pshort) if suffix: addparam(t, "suffix", suffix) t.name = "ru-decl-adj" pagemsg("Rewrote %s as %s" % (origtemplate, unicode(t))) else: converted = False if converted: oldtemps.append(origname) if oldtemps: comment = "convert %s -> ru-decl-adj" % ", ".join(oldtemps) else: comment = None return text, comment
def rewrite_one_page_arz_headword(page, index, text): temps_changed = [] for t in text.filter_templates(): if unicode(t.name) == "arz-noun": head = getparam(t, "head") rmparam(t, "head") tr = getparam(t, "tr") rmparam(t, "tr") sort = getparam(t, "sort") rmparam(t, "sort") g = getparam(t, "g") rmparam(t, "g") g2 = getparam(t, "g2") rmparam(t, "g2") pl = getparam(t, "2") rmparam(t, "2") pltr = getparam(t, "3") rmparam(t, "3") addparam(t, "1", head) addparam(t, "2", g) if g2: addparam(t, "g2", g2) if tr: addparam(t, "tr", tr) if pl: addparam(t, "pl", pl) if pltr: addparam(t, "pltr", pltr) if sort: addparam(t, "sort", sort) temps_changed.append("arz-noun") elif unicode(t.name) == "arz-adj": head = getparam(t, "head") rmparam(t, "head") tr = getparam(t, "tr") rmparam(t, "tr") sort = getparam(t, "sort") rmparam(t, "sort") pl = getparam(t, "pwv") or getparam(t, "p") rmparam(t, "pwv") rmparam(t, "p") pltr = getparam(t, "ptr") rmparam(t, "ptr") f = getparam(t, "fwv") or getparam(t, "f") rmparam(t, "fwv") rmparam(t, "f") ftr = getparam(t, "ftr") rmparam(t, "ftr") addparam(t, "1", head) if tr: addparam(t, "tr", tr) if f: addparam(t, "f", f) if ftr: addparam(t, "ftr", ftr) if pl: addparam(t, "pl", pl) if pltr: addparam(t, "pltr", pltr) if sort: addparam(t, "sort", sort) temps_changed.append("arz-adj") return text, "rewrite %s to new style" % ", ".join(temps_changed)
def vocalize_head(pagetitle, index, template): paramschanged = [] #pagetitle = unicode(page.title(withNamespace=False)) # Handle existing 1= and head from page title if template.has("tr"): # Check for multiple transliterations of head or 1. If so, split on # the multiple transliterations, with separate vocalized heads. latin = getparam(template, "tr") if "," in latin: trs = re.split(",\\s*", latin) # Find the first alternate head (head2, head3, ...) not already present i = 2 while template.has("head" + str(i)): i += 1 addparam(template, "tr", trs[0]) if template.has("1"): head = getparam(template, "1") # for new heads, only use existing head in 1= if ends with -un (tanwīn), # because many of the existing 1= values are vocalized according to the # first transliterated entry in the list and won't work with the others if not head.endswith(u"\u064C"): head = pagetitle else: head = pagetitle for tr in trs[1:]: addparam(template, "head" + str(i), head) addparam(template, "tr" + str(i), tr) i += 1 paramschanged.append("split translit into multiple heads") # Try to vocalize 1= result = vocalize_param(pagetitle, index, template, "1", "tr") if isinstance(result, basestring): paramschanged.append("1") # If 1= not found, try vocalizing the page title and make it the 1= value if not result: arabic = unicode(pagetitle) latin = getparam(template, "tr") if arabic and latin: vocalized = do_vocalize_param(pagetitle, index, template, "page title", arabic, latin) if vocalized: oldtempl = "%s" % unicode(template) if template.has("2"): addparam(template, "1", vocalized, before="2") else: addparam(template, "1", vocalized, before="tr") paramschanged.append("1") msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) # Check and try to vocalize extra heads i = 2 result = True while result: thisparam = "head" + str(i) result = vocalize_param(pagetitle, index, template, thisparam, "tr" + str(i)) if isinstance(result, basestring): paramschanged.append(thisparam) i += 1 return paramschanged
def process_param(pagetitle, index, template, param, paramtr, include_tempname_in_changelog=False): def pagemsg(text): msg("Page %s %s: %s.%s: %s" % (index, pagetitle, template.name, param, text)) arabic = getparam(template, param) latin = getparam(template, paramtr) if include_tempname_in_changelog: paramtrname = "%s.%s" % (template.name, paramtr) else: paramtrname = paramtr if not arabic: return False if latin == "-": pagemsg("Translit is '-', skipping") return True if latin: try: _, canonlatin = tr_matching(arabic, latin, True, pagemsg) if not canonlatin: pagemsg("Unable to match-canonicalize %s (%s)" % (arabic, latin)) except Exception as e: pagemsg("Trying to match-canonicalize %s (%s): %s" % (arabic, latin, e)) canonlatin = None try: translit = ar_translit.tr(arabic) if not translit: pagemsg("Unable to auto-translit %s" % arabic) except Exception as e: pagemsg("Trying to transliterate %s: %s" % (arabic, e)) translit = None if translit and canonlatin: if translit == canonlatin: #if (translit == canonlatin or # translit == canonlatin + "un" or # translit == u"ʾ" + canonlatin or # translit == u"ʾ" + canonlatin + "un"): pagemsg("Removing redundant translit for %s (%s)" % (arabic, latin)) oldtempl = "%s" % unicode(template) template.remove(paramtr) msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) return ["remove redundant %s=%s" % (paramtrname, latin)] else: pagemsg("Auto-translit for %s (%s) not same as manual translit %s (canonicalized %s)" % (arabic, translit, latin, canonlatin)) if canonlatin: if latin != canonlatin: pagemsg("Match-canonicalizing Latin %s to %s" % (latin, canonlatin)) oldtempl = "%s" % unicode(template) addparam(template, paramtr, canonlatin) msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) return ["match-canon %s=%s -> %s" % (paramtrname, latin, canonlatin)] return True canonlatin, _ = ar_translit.canonicalize_latin_arabic(latin, None) if latin != canonlatin: pagemsg("Self-canonicalizing Latin %s to %s" % (latin, canonlatin)) oldtempl = "%s" % unicode(template) addparam(template, paramtr, canonlatin) msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) return ["self-canon %s=%s -> %s" % (paramtrname, latin, canonlatin)] return True
def putp(param, value): addparam(headword_template, param, value)
def canon_head(pagetitle, index, template): actions = [] #pagetitle = unicode(page.title(withNamespace=False)) # Handle existing 1= and head from page title if template.has("tr"): # Check for multiple transliterations of head or 1. If so, split on # the multiple transliterations, with separate vocalized heads. latin = getparam(template, "tr") if "," in latin or "/" in latin: trs = re.split("\\s*[,/]\\s*", latin) # Find the first alternate head (head2, head3, ...) not already present i = 2 while template.has("head" + str(i)): i += 1 addparam(template, "tr", trs[0]) if template.has("1"): head = getparam(template, "1") # for new heads, only use existing head in 1= if ends with -un (tanwīn), # because many of the existing 1= values are vocalized according to the # first transliterated entry in the list and won't work with the others if not head.endswith(u"\u064C"): head = pagetitle else: head = pagetitle for tr in trs[1:]: addparam(template, "head" + str(i), head) addparam(template, "tr" + str(i), tr) i += 1 actions.append("split translit into multiple heads") # Try to vocalize 1= result = canon_param(pagetitle, index, template, "1", "tr") if result != False: actions.extend(result) # If 1= not found, try vocalizing the page title and make it the 1= value if result == False: arabic = pagetitle latin = getparam(template, "tr") if arabic and latin: canonarabic, canonlatin, newactions = do_canon_param( pagetitle, index, template, "page title", "1", "tr", arabic, latin) oldtempl = "%s" % unicode(template) if canonarabic: if template.has("2"): addparam(template, "1", canonarabic, before="2") else: addparam(template, "1", canonarabic, before="tr") if canonlatin == True: template.remove("tr") elif canonlatin: addparam(template, "tr", canonlatin) actions.extend(newactions) if canonarabic or canonlatin: msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) # Check and try to vocalize extra heads i = 2 result = True while result != False: thisparam = "head" + str(i) result = canon_param(pagetitle, index, template, thisparam, "tr" + str(i)) if result != False: actions.extend(result) i += 1 return actions
def rewrite_one_page_idafa(page, index, text): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) num_new_style = 0 num_modhead_changed = 0 num_state_ind_to_ind_def = 0 num_basestate_ind_def = 0 idafa_added = [] has_proper_noun = False for t in text.filter_templates(): if t.name == "ar-proper noun": has_proper_noun = True for t in text.filter_templates(): if t.name.startswith("ar-decl-"): changed = False # Change state=ind for proper noun to state=ind-def oldt = unicode(t) if getparam(t, "state") == "ind" and has_proper_noun: addparam(t, "state", "ind-def") pagemsg( "Converting state=ind to state=ind-def for proper noun") pagemsg("Replacing %s with %s" % (oldt, unicode(t))) num_state_ind_to_ind_def += 1 elif getparam(t, "state") == "def" and getparam( t, "basestate") == "ind": t.remove("basestate") addparam(t, "state", "ind-def") pagemsg("Converting state=def|basestate=ind to state=ind-def") pagemsg("Replacing %s with %s" % (oldt, unicode(t))) num_basestate_ind_def += 1 # Change old-style ʾidāfa (state=con) to new-style (basestate=con) #oldt = unicode(t) #if (getparam(t, "state") == "con" and getparam(t, "modcase") and # not getparam(t, "basestate")): # modstate = getparam(t, "modstate") # addparam(t, "basestate", "con") # addparam(t, "modidafa", "yes") # if not modstate: # t.remove("state") # else: # addparam(t, "state", modstate) # pagemsg("Replacing %s with %s" % (oldt, unicode(t))) # changed = True # Remove manual ʾidāfa params when possible and substitute idafa= #oldt = unicode(t) #if getparam(t, "basestate") == "con" and getparam(t, "modcase") == "gen": # idafa = "" # modnumber = getparam(t, "modnumber") # if not modnumber: # pagemsg("WARNING: Missing modnumber= in idafa template, substituting sg: %s" % # unicode(t)) # modnumber = "sg" # addparam(t, "modnumber", "sg") # modstate = getparam(t, "modstate") # state = getparam(t, "state") # if not modstate: # if state: # pagemsg("WARNING: Extraneous state= in idafa template: %s" % # unicode(t)) # idafa = modnumber # elif state != modstate: # pagemsg("WARNING: modstate= in idafa template but state= doesn't match: %s" # % unicode(t)) # else: # idafa = "%s-%s" % (modstate, modnumber) # t.remove("state") # t.remove("modstate") # if idafa: # t.remove("basestate") # t.remove("modcase") # t.remove("modnumber") # t.remove("modidafa") # m = re.match("^ind-(.*)$", idafa) # if m: # if has_proper_noun: # pagemsg("Not replacing idafa state 'ind' because proper noun: %s" # % unicode(t)) # elif pagetitle in [u"أقدم مهنة", u"غير طبيعي"]: # pagemsg("Not replacing idafa state 'ind' because it's special-cased: %s" % unicode(t)) # else: # pagemsg("NOTE: Replacing idafa state 'ind' with no state restriction: %s" # % unicode(t)) # idafa = m.group(1) # m = re.match("^(.*?)-sg$", idafa) # if m: # idafa = m.group(1) # if idafa == "sg": # idafa = "yes" # addparam(t, "idafa", idafa) # pagemsg("Replacing %s with %s" % (oldt, unicode(t))) # idafa_added.append(idafa) # elif changed: # num_new_style += 1 if (getparam(t, "basestate") or getparam(t, "modcase") or getparam(t, "modstate") or getparam(t, "modnumber") or getparam(t, "modidafa")): pagemsg("WARNING: idafa params remain after processing: %s" % unicode(t)) ## Change modN into modheadN #oldt = unicode(t) #changed = False #for i in xrange(2, 20): # modn = getparam(t, "mod" + str(i)) # if modn: # t.remove("mod" + str(i)) # addparam(t, "modhead" + str(i), modn) # changed = True #if changed: # pagemsg("Replacing %s with %s" % (oldt, unicode(t))) # num_modhead_changed += 1 if getparam(t, "omitarticle"): pagemsg("WARNING: omitarticle present: %s" % unicode(t)) if getparam(t, "state") == "ind": pagemsg("WARNING: state=ind still present: %s" % unicode(t)) actions = [] if idafa_added: actions.append(u"Replaced ʾidāfa params with idafa= param: %s" % (", ".join(idafa_added))) if num_new_style: actions.append(u"Corrected %s old-style ʾidāfa param(s) to new-style" % num_new_style) if num_modhead_changed: actions.append(u"Changed modN to modheadN") if num_state_ind_to_ind_def: actions.append(u"Converted state=ind to state=ind-def for proper noun") if num_basestate_ind_def: actions.append(u"Converted state=def|basestate=ind to state=ind-def") if actions: changelog = "; ".join(actions) pagemsg("Changelog = %s" % changelog) return text, changelog return text, ""
def rewrite_one_page_idafa(page, index, text): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) num_new_style = 0 num_modhead_changed = 0 num_state_ind_to_ind_def = 0 num_basestate_ind_def = 0 idafa_added = [] has_proper_noun = False for t in text.filter_templates(): if t.name == "ar-proper noun": has_proper_noun = True for t in text.filter_templates(): if t.name.startswith("ar-decl-"): changed = False # Change state=ind for proper noun to state=ind-def oldt = unicode(t) if getparam(t, "state") == "ind" and has_proper_noun: addparam(t, "state", "ind-def") pagemsg("Converting state=ind to state=ind-def for proper noun") pagemsg("Replacing %s with %s" % (oldt, unicode(t))) num_state_ind_to_ind_def += 1 elif getparam(t, "state") == "def" and getparam(t, "basestate") == "ind": t.remove("basestate") addparam(t, "state", "ind-def") pagemsg("Converting state=def|basestate=ind to state=ind-def") pagemsg("Replacing %s with %s" % (oldt, unicode(t))) num_basestate_ind_def += 1 # Change old-style ʾidāfa (state=con) to new-style (basestate=con) #oldt = unicode(t) #if (getparam(t, "state") == "con" and getparam(t, "modcase") and # not getparam(t, "basestate")): # modstate = getparam(t, "modstate") # addparam(t, "basestate", "con") # addparam(t, "modidafa", "yes") # if not modstate: # t.remove("state") # else: # addparam(t, "state", modstate) # pagemsg("Replacing %s with %s" % (oldt, unicode(t))) # changed = True # Remove manual ʾidāfa params when possible and substitute idafa= #oldt = unicode(t) #if getparam(t, "basestate") == "con" and getparam(t, "modcase") == "gen": # idafa = "" # modnumber = getparam(t, "modnumber") # if not modnumber: # pagemsg("WARNING: Missing modnumber= in idafa template, substituting sg: %s" % # unicode(t)) # modnumber = "sg" # addparam(t, "modnumber", "sg") # modstate = getparam(t, "modstate") # state = getparam(t, "state") # if not modstate: # if state: # pagemsg("WARNING: Extraneous state= in idafa template: %s" % # unicode(t)) # idafa = modnumber # elif state != modstate: # pagemsg("WARNING: modstate= in idafa template but state= doesn't match: %s" # % unicode(t)) # else: # idafa = "%s-%s" % (modstate, modnumber) # t.remove("state") # t.remove("modstate") # if idafa: # t.remove("basestate") # t.remove("modcase") # t.remove("modnumber") # t.remove("modidafa") # m = re.match("^ind-(.*)$", idafa) # if m: # if has_proper_noun: # pagemsg("Not replacing idafa state 'ind' because proper noun: %s" # % unicode(t)) # elif pagetitle in [u"أقدم مهنة", u"غير طبيعي"]: # pagemsg("Not replacing idafa state 'ind' because it's special-cased: %s" % unicode(t)) # else: # pagemsg("NOTE: Replacing idafa state 'ind' with no state restriction: %s" # % unicode(t)) # idafa = m.group(1) # m = re.match("^(.*?)-sg$", idafa) # if m: # idafa = m.group(1) # if idafa == "sg": # idafa = "yes" # addparam(t, "idafa", idafa) # pagemsg("Replacing %s with %s" % (oldt, unicode(t))) # idafa_added.append(idafa) # elif changed: # num_new_style += 1 if (getparam(t, "basestate") or getparam(t, "modcase") or getparam(t, "modstate") or getparam(t, "modnumber") or getparam(t, "modidafa")): pagemsg("WARNING: idafa params remain after processing: %s" % unicode(t)) ## Change modN into modheadN #oldt = unicode(t) #changed = False #for i in xrange(2, 20): # modn = getparam(t, "mod" + str(i)) # if modn: # t.remove("mod" + str(i)) # addparam(t, "modhead" + str(i), modn) # changed = True #if changed: # pagemsg("Replacing %s with %s" % (oldt, unicode(t))) # num_modhead_changed += 1 if getparam(t, "omitarticle"): pagemsg("WARNING: omitarticle present: %s" % unicode(t)) if getparam(t, "state") == "ind": pagemsg("WARNING: state=ind still present: %s" % unicode(t)) actions = [] if idafa_added: actions.append(u"Replaced ʾidāfa params with idafa= param: %s" % ( ", ".join(idafa_added))) if num_new_style: actions.append(u"Corrected %s old-style ʾidāfa param(s) to new-style" % num_new_style) if num_modhead_changed: actions.append(u"Changed modN to modheadN") if num_state_ind_to_ind_def: actions.append(u"Converted state=ind to state=ind-def for proper noun") if num_basestate_ind_def: actions.append(u"Converted state=def|basestate=ind to state=ind-def") if actions: changelog = "; ".join(actions) pagemsg("Changelog = %s" % changelog) return text, changelog return text, ""