def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) tname = unicode(t.name) if tname.startswith("ru-conj-") and tname != "ru-conj-verb-see": m = re.search("^ru-conj-(.*)$", tname) t.name = "ru-conj" conjtype = m.group(1) varargno = None variant = None if conjtype in ["3oa", "4a", "4b", "4c", "6a", "6c", "11a", "16a", "16b", u"irreg-дать", u"irreg-клясть", u"irreg-быть"]: varargno = 3 elif conjtype in ["5a", "5b", "5c", "6b", "9a", "9b", "11b", "14a", "14b", "14c"]: varargno = 4 elif conjtype in ["7b"]: varargno = 5 elif conjtype in ["7a"]: varargno = 6 if varargno: variant = getparam(t, str(varargno)) if re.search("^[abc]", variant): variant = "/" + variant if getparam(t, str(varargno + 1)) or getparam(t, str(varargno + 2)) or getparam(t, str(varargno + 3)): t.add(str(varargno), "") else: rmparam(t, str(varargno)) conjtype = conjtype + variant notes.append("ru-conj-* -> ru-conj, moving params up by one%s" % (variant and " (and move variant spec)" or "")) seenval = False for i in xrange(20, 0, -1): val = getparam(t, str(i)) if val: seenval = True if seenval: t.add(str(i + 1), val) t.add("1", conjtype) blib.sort_params(t) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) tname = unicode(t.name) if tname.startswith("ru-conj-") and tname != "ru-conj-verb-see": m = re.search("^ru-conj-(.*)$", tname) t.name = "ru-conj" conjtype = m.group(1) varargno = None variant = None if conjtype in [ "3oa", "4a", "4b", "4c", "6a", "6c", "11a", "16a", "16b", u"irreg-дать", u"irreg-клясть", u"irreg-быть" ]: varargno = 3 elif conjtype in [ "5a", "5b", "5c", "6b", "9a", "9b", "11b", "14a", "14b", "14c" ]: varargno = 4 elif conjtype in ["7b"]: varargno = 5 elif conjtype in ["7a"]: varargno = 6 if varargno: variant = getparam(t, str(varargno)) if re.search("^[abc]", variant): variant = "/" + variant if getparam(t, str(varargno + 1)) or getparam( t, str(varargno + 2)) or getparam( t, str(varargno + 3)): t.add(str(varargno), "") else: rmparam(t, str(varargno)) conjtype = conjtype + variant notes.append("ru-conj-* -> ru-conj, moving params up by one%s" % (variant and " (and move variant spec)" or "")) seenval = False for i in xrange(20, 0, -1): val = getparam(t, str(i)) if val: seenval = True if seenval: t.add(str(i + 1), val) t.add("1", conjtype) blib.sort_params(t) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes
def process_page(index, page, direc, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] origdirec = direc for t in parsed.filter_templates(): origt = unicode(t) direc = origdirec if unicode(t.name) in ["ru-conj-7b"]: rmparam(t, "past_m") rmparam(t, "past_f") rmparam(t, "past_n") rmparam(t, "past_pl") rmparam(t, "notes") rmparam(t, "past_adv_part") rmparam(t, "past_adv_part2") rmparam(t, "past_adv_part_short") #ppps = blib.fetch_param_chain(t, "past_pasv_part", "past_pasv_part") #blib.remove_param_chain(t, "past_pasv_part", "past_pasv_part") presstem = getparam(t, "3") rmparam(t, "5") rmparam(t, "4") rmparam(t, "3") npp = "npp" in direc direc = direc.replace("npp", "") yo = u"ё" in direc direc = direc.replace(u"ё", "") direc = re.sub("7b/?", "", direc) if re.search(u"е́?[^аэыоуяеиёю]*$", presstem): if not yo: pagemsg(u"Something wrong, е-stem present and no ё directive") if npp: presstem = ru.make_ending_stressed(presstem) else: presstem = re.sub(u"е́?([^аэыоуяеиёю]*)$", ur"ё\1", presstem) else: presstem = ru.make_ending_stressed(presstem) pap = getparam(t, "past_actv_part") pred_pap = presstem + u"ший" if direc not in ["b", "b(9)"] and re.search(u"[дт]$", presstem): pred_pap = re.sub(u"[дт]$", "", presstem) + u"вший" if pap: if pap == pred_pap: pagemsg("Removing past_actv_part=%s because same as predicted" % pap) rmparam(t, "past_actv_part") else: pagemsg("Not removing unpredictable past_actv_part=%s (predicted %s)" % (pap, pred_pap)) for param in t.params: if not re.search("^([0-9]+$|past_pasv_part)", unicode(param.name)): pagemsg("Found additional named param %s" % unicode(param)) t.add("3", presstem) if direc: t.add("4", "") t.add("5", direc) blib.sort_params(t) #blib.set_param_chain(t, ppps, "past_pasv_part", "past_pasv_part") notes.append("set class-7b verb to directive %s%s" % (direc, npp and u" (no ё in present stem)" or "")) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, direc): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] origdirec = direc for t in parsed.filter_templates(): origt = unicode(t) direc = origdirec if unicode(t.name) in ["ru-conj-7b"]: rmparam(t, "past_m") rmparam(t, "past_f") rmparam(t, "past_n") rmparam(t, "past_pl") rmparam(t, "notes") rmparam(t, "past_adv_part") rmparam(t, "past_adv_part2") rmparam(t, "past_adv_part_short") #ppps = blib.fetch_param_chain(t, "past_pasv_part", "past_pasv_part") #blib.remove_param_chain(t, "past_pasv_part", "past_pasv_part") presstem = getparam(t, "3") rmparam(t, "5") rmparam(t, "4") rmparam(t, "3") npp = "npp" in direc direc = direc.replace("npp", "") yo = u"ё" in direc direc = direc.replace(u"ё", "") direc = re.sub("7b/?", "", direc) if re.search(u"е́?[^аэыоуяеиёю]*$", presstem): if not yo: pagemsg(u"Something wrong, е-stem present and no ё directive") if npp: presstem = rulib.make_ending_stressed_ru(presstem) else: presstem = re.sub(u"е́?([^аэыоуяеиёю]*)$", ur"ё\1", presstem) else: presstem = rulib.make_ending_stressed_ru(presstem) pap = getparam(t, "past_actv_part") pred_pap = presstem + u"ший" if direc not in ["b", "b(9)"] and re.search(u"[дт]$", presstem): pred_pap = re.sub(u"[дт]$", "", presstem) + u"вший" if pap: if pap == pred_pap: pagemsg("Removing past_actv_part=%s because same as predicted" % pap) rmparam(t, "past_actv_part") else: pagemsg("Not removing unpredictable past_actv_part=%s (predicted %s)" % (pap, pred_pap)) for param in t.params: if not re.search("^([0-9]+$|past_pasv_part)", unicode(param.name)): pagemsg("Found additional named param %s" % unicode(param)) t.add("3", presstem) if direc: t.add("4", "") t.add("5", direc) blib.sort_params(t) #blib.set_param_chain(t, ppps, "past_pasv_part", "past_pasv_part") notes.append("set class-7b verb to directive %s%s" % (direc, npp and u" (no ё in present stem)" or "")) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes