def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) pagemsg("Processing") parsed = blib.parse(page) for t in parsed.filter_templates(): if unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam(t, "1").startswith("pf"): if tname == "ru-conj": tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t)) else: tempcall = re.sub(r"\{\{ru-conj-old", "{{ru-generate-verb-forms|old=y", unicode(t)) result = expand_text(tempcall) if not result: pagemsg("WARNING: Error generating forms, skipping") continue args = rulib.split_generate_args(result) for base in ["past_pasv_part", "ppp"]: for i in ["", "2", "3", "4", "5", "6", "7", "8", "9"]: val = getparam(t, base + i) if val and val != "-": val = re.sub("//.*", "", val) pagemsg("Found perfective past passive participle: %s" % val)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) pagemsg("Processing") manual_ppp_forms = ["past_pasv_part", "past_pasv_part2", "past_pasv_part3", "past_pasv_part4", "ppp", "ppp2", "ppp3", "ppp4"] text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) tname = unicode(t.name) if tname == "ru-conj": manual_ppps = [] for form in manual_ppp_forms: ppp = getparam(t, form) if ppp and ppp != "-": manual_ppps.append(ppp) if not manual_ppps: continue if [x for x in t.params if unicode(x.value) == "or"]: pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t)) continue curvariant = getparam(t, "2") if "+p" in curvariant or "(7)" in curvariant or "(8)" in curvariant: pagemsg("WARNING: Found both manual PPP and PPP variant, something wrong: %s" % unicode(t)) continue t2 = blib.parse_text(unicode(t)).filter_templates()[0] for form in manual_ppp_forms: rmparam(t2, form) variants_to_try = ["+p"] if u"ё" in re.sub(u"ённый$", "", manual_ppps[0]): variants_to_try.append(u"+pё") if u"жденный" in manual_ppps[0] or u"ждённый" in manual_ppps[0]: variants_to_try.append(u"+pжд") notsamemsgs = [] for variant in variants_to_try: t2.add("2", curvariant + variant) tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t2)) result = expand_text(tempcall) if not result: pagemsg("WARNING: Error generating forms, skipping") continue args = rulib.split_generate_args(result) if "past_pasv_part" not in args: pagemsg("WARNING: Something wrong, no past passive participle generated: %s" % unicode(t)) continue auto_ppps = [] for form in manual_ppp_forms: if form in args: for ppp in re.split(",", args[form]): if ppp and ppp != "-": auto_ppps.append(ppp) if manual_ppps == auto_ppps: pagemsg("Manual PPP's %s same as auto-generated PPP's, switching to auto" % ",".join(manual_ppps)) for form in manual_ppp_forms: rmparam(t, form) t.add("2", curvariant + variant) notes.append("replaced manual PPP's with variant %s" % variant) break else: notsamemsgs.append("WARNING: Manual PPP's %s not same as auto-generated PPP's %s: %s" % (",".join(manual_ppps), ",".join(auto_ppps), unicode(t))) else: # no break in for loop for m in notsamemsgs: pagemsg(m) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) parsed = blib.parse(page) headword_template = None see_template = None for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun+", "ru-proper noun+"]: if headword_template: pagemsg("WARNING: Multiple headword templates, skipping") return headword_template = t if unicode(t.name) in ["ru-decl-noun-see"]: if see_template: pagemsg("WARNING: Multiple ru-decl-noun-see templates, skipping") return see_template = t if not headword_template: pagemsg("WARNING: No ru-noun+ or ru-proper noun+ templates, skipping") return if not see_template: pagemsg("WARNING: No ru-decl-noun-see templates, skipping") return del see_template.params[:] for param in headword_template.params: see_template.add(param.name, param.value) see_template.name = "ru-noun-table" if unicode(headword_template.name) == "ru-proper noun+": # Things are trickier for proper nouns because they default to n=sg, whereas # ru-noun-table defaults to n=both. We have to expand both templates and # fetch the value of n, and set it in ru-noun-table if not the same. # 1. Generate args for headword proper-noun template, using |ndef=sg # because ru-proper noun+ defaults to sg and ru-generate-noun-args # would otherwise default to both. headword_generate_template = re.sub(r"^\{\{ru-proper noun\+", "{{ru-generate-noun-args", unicode(headword_template)) headword_generate_template = re.sub(r"\}\}$", "|ndef=sg}}", headword_generate_template) headword_generate_result = expand_text(headword_generate_template) if not headword_generate_result: pagemsg("WARNING: Error generating ru-proper noun+ args") return None # 2. Fetch actual value of n. headword_args = ru.split_generate_args(headword_generate_result) headword_n = headword_args["n"] # 3. If sg, we always need to set n=sg explicitly in ru-noun-table. if headword_n == "s": see_template.add("n", "sg") # 4. If pl, leave alone, since both will default to plural only if the # lemma is pl, else n=pl needs to be set for both. elif headword_n == "p": pass # 5. If both, n=both had to have been set explicitly in the headword, # but it's the default in ru-noun-table unless the lemma is plural. # So remove n=both, generate the arguments, and see if the actual # value of args.n is b (for "both"); if not, set n=both. else: assert headword_n == "b" rmparam(see_template, "n") see_generate_template = re.sub(r"^\{\{ru-noun-table", "{{ru-generate-noun-args", unicode(see_template)) see_generate_result = expand_text(see_generate_template) if not see_generate_result: pagemsg("WARNING: Error generating ru-noun-table args") return None see_args = ru.split_generate_args(see_generate_result) if see_args["n"] != "b": see_template.add("n", "both") comment = "Replace ru-decl-noun-see with ru-noun-table, taken from headword template (%s)" % unicode(headword_template.name) if save: pagemsg("Saving with comment = %s" % comment) page.text = unicode(parsed) page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page_section(index, page, section, verbose): pagetitle = unicode(page.title()) subpagetitle = re.sub("^.*:", "", pagetitle) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) if not page.exists(): pagemsg("WARNING: Page doesn't exist, skipping") return None parsed = blib.parse_text(section) noun_table_templates = [] noun_old_templates = [] for t in parsed.filter_templates(): if unicode(t.name) == "ru-decl-noun-see": pagemsg("Found ru-decl-noun-see, skipping") return None for t in parsed.filter_templates(): if unicode(t.name) == "ru-noun-table": noun_table_templates.append(t) if unicode(t.name) == "ru-noun-old": noun_old_templates.append(t) if len(noun_table_templates) > 1: pagemsg("WARNING: Found multiple ru-noun-table templates, skipping") return None if len(noun_old_templates) > 1: pagemsg("WARNING: Found multiple ru-noun-old templates, skipping") return None if len(noun_table_templates) < 1: if noun_old_templates: pagemsg("WARNING: No ru-noun-table templates but found ru-noun-old template(s): %s" % ", ".join(unicode(x) for x in noun_old_templates)) return unicode(parsed), 0, 0, 0, 0 for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun", "ru-proper noun"]: pagemsg("Found ru-noun or ru-proper noun, skipping") return None headword_templates = [] for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun+", "ru-proper noun+"]: headword_templates.append(t) if len(headword_templates) > 1: pagemsg("WARNING: Found multiple headword templates, skipping") return None if len(headword_templates) < 1: return unicode(parsed), 0, 0, 0, 0 noun_table_template = noun_table_templates[0] noun_old_template = noun_old_templates[0] if len(noun_old_templates) == 1 else None headword_template = headword_templates[0] decl_templates = [x for x in [noun_table_template, noun_old_template] if x] if verbose: pagemsg("Found headword template: %s" % unicode(headword_template)) pagemsg("Found decl template: %s" % unicode(noun_table_template)) if noun_old_template: pagemsg("Found old decl template: %s" % unicode(noun_old_template)) orig_headword_template = unicode(headword_template) orig_noun_table_template = unicode(noun_table_template) genders = blib.fetch_param_chain(headword_template, "g", "g") masculines = blib.fetch_param_chain(headword_template, "m", "m") feminines = blib.fetch_param_chain(headword_template, "f", "f") notrcat = getparam(headword_template, "notrcat") filtered_headword_params = [] for param in headword_template.params: name = unicode(param.name) if re.search("^[gmf][0-9]*$", name) or name == "notrcat": pass else: filtered_headword_params.append((param.name, param.value)) filtered_headword_template = blib.parse_text("{{ru-noun+}}").filter_templates()[0] for name, value in filtered_headword_params: filtered_headword_template.add(name, value) ru_noun_table_cleaned = 0 ru_noun_table_link_copied = 0 ru_noun_changed = 0 ru_proper_noun_changed = 0 new_decl_params = [] for param in noun_table_template.params: name = unicode(param.name) if re.search("^[gmf][0-9]*$", name): pagemsg("WARNING: Found g=, m= or f= in noun-table, removing: %s" % unicode(noun_table_template)) else: new_decl_params.append((param.name, param.value)) del noun_table_template.params[:] for name, value in new_decl_params: noun_table_template.add(name, value) if orig_noun_table_template != unicode(noun_table_template): ru_noun_table_cleaned = 1 modified_noun_table_template = blib.parse_text("{{ru-noun-table}}").filter_templates()[0] for param in noun_table_template.params: modified_noun_table_template.add(param.name, param.value) # If proper noun and n is both then we need to add n=both because # proper noun+ defaults to n=sg if unicode(headword_template.name) == "ru-proper noun+": generate_template = re.sub(r"^\{\{ru-noun-table", "{{ru-generate-noun-args", unicode(noun_table_template)) generate_result = expand_text(generate_template) if not generate_result: pagemsg("WARNING: Error generating noun args, skipping") return None args = ru.split_generate_args(generate_result) # If proper noun and n is both then we need to add n=both because # proper noun+ defaults to n=sg if args["n"] == "b" and not getparam(modified_noun_table_template, "n"): pagemsg("Adding n=both to headword template") modified_noun_table_template.add("n", "both") # Correspondingly, if n is sg then we can usually remove n=sg; # but we need to check that the number is actually sg with n=sg # removed because of the possibility of plurale tantum lemmas if args["n"] == "s": generate_template_with_ndef = generate_template.replace("}}", "|ndef=sg}}") generate_template_with_ndef = re.sub(r"\|n=s[^=|{}]*", "", generate_template_with_ndef) generate_result = expand_text(generate_template_with_ndef) if not generate_result: pagemsg("WARNING: Error generating noun args, skipping") return None ndef_args = ru.split_generate_args(generate_result) if ndef_args["n"] == "s": existing_n = getparam(headword_template, "n") if existing_n and not re.search(r"^s", existing_n): pagemsg("WARNING: Something wrong: Found n=%s, not singular" % existing_n) pagemsg("Removing n=sg from headword template") rmparam(modified_noun_table_template, "n") else: pagemsg("WARNING: Unable to remove n= from headword template because n=%s" % ndef_args["n"]) new_headword_template = re.sub(r"^\{\{ru-noun-table", "{{ru-noun+", unicode(modified_noun_table_template)) existing_filtered_headword_template = unicode(filtered_headword_template) change_existing_headword = False if existing_filtered_headword_template != new_headword_template: if "[" in existing_filtered_headword_template and "[" not in new_headword_template: if blib.remove_links(existing_filtered_headword_template) == new_headword_template: pagemsg("Headword has links but decl doesn't and they're otherwise the same, copying headword to decl") del noun_table_template.params[:] for param in filtered_headword_template.params: noun_table_template.add(param.name, param.value) ru_noun_table_link_copied = 1 ru_noun_table_cleaned = 0 else: pagemsg("WARNING: Existing headword template %s would be overwritten with %s but links would be erased, not doing it, check manually" % (existing_filtered_headword_template, new_headword_template)) return None else: pagemsg("WARNING: Existing headword template %s will be overwritten with %s" % (existing_filtered_headword_template, new_headword_template)) change_existing_headword = True if change_existing_headword and (not lemmas or pagetitle in lemmas): del headword_template.params[:] for param in modified_noun_table_template.params: headword_template.add(param.name, param.value) blib.set_param_chain(headword_template, genders, "g", "g") blib.set_param_chain(headword_template, masculines, "m", "m") blib.set_param_chain(headword_template, feminines, "f", "f") if notrcat: headword_template.add("notrcat", notrcat) #genders = runoun.check_old_noun_headword_forms(headword_template, args, # subpagetitle, pagemsg) #if genders == None: # return None #new_params = [] #for param in noun_table_template.params: # new_params.append((param.name, param.value)) #params_to_preserve = runoun.fix_old_headword_params(headword_template, # new_params, genders, pagemsg) #if params_to_preserve == None: # return None new_noun_table_template = unicode(noun_table_template) if new_noun_table_template != orig_noun_table_template: pagemsg("Replacing noun table %s with %s" % (orig_noun_table_template, new_noun_table_template)) new_headword_template = unicode(headword_template) if new_headword_template != orig_headword_template: pagemsg("Replacing headword %s with %s" % (orig_headword_template, new_headword_template)) if unicode(headword_template.name) == "ru-noun+": ru_noun_changed = 1 else: ru_proper_noun_changed = 1 return unicode(parsed), ru_noun_table_cleaned, ru_noun_table_link_copied, ru_noun_changed, ru_proper_noun_changed
def process_page_section(index, page, section, verbose): pagetitle = unicode(page.title()) subpagetitle = re.sub("^.*:", "", pagetitle) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) if not page.exists(): pagemsg("WARNING: Page doesn't exist, skipping") return None parsed = blib.parse_text(section) noun_table_templates = [] noun_old_templates = [] for t in parsed.filter_templates(): if unicode(t.name) == "ru-decl-noun-see": pagemsg("Found ru-decl-noun-see, skipping") return None for t in parsed.filter_templates(): if unicode(t.name) == "ru-noun-table": noun_table_templates.append(t) if unicode(t.name) == "ru-noun-old": noun_old_templates.append(t) if len(noun_table_templates) > 1: pagemsg("WARNING: Found multiple ru-noun-table templates, skipping") return None if len(noun_old_templates) > 1: pagemsg("WARNING: Found multiple ru-noun-old templates, skipping") return None if len(noun_table_templates) < 1: if noun_old_templates: pagemsg("WARNING: No ru-noun-table templates but found ru-noun-old template(s): %s" % ", ".join(unicode(x) for x in noun_old_templates)) return unicode(parsed), 0, 0, 0, [] for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun+", "ru-proper noun+"]: pagemsg("Found ru-noun+ or ru-proper noun+, skipping") return None headword_templates = [] for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun", "ru-proper noun"]: headword_templates.append(t) if len(headword_templates) > 1: pagemsg("WARNING: Found multiple headword templates, skipping") return None if len(headword_templates) < 1: return unicode(parsed), 0, 0, 0, [] noun_table_template = noun_table_templates[0] noun_old_template = noun_old_templates[0] if len(noun_old_templates) == 1 else None headword_template = headword_templates[0] frobbed_manual_translit = [] decl_templates = [x for x in [noun_table_template, noun_old_template] if x] if verbose: pagemsg("Found headword template: %s" % unicode(headword_template)) pagemsg("Found decl template: %s" % unicode(noun_table_template)) if noun_old_template: pagemsg("Found old decl template: %s" % unicode(noun_old_template)) # Retrieve headword translit and maybe transfer to decl headword_tr = getparam(headword_template, "tr") if headword_tr: if verbose: pagemsg("Found headword manual translit tr=%s" % headword_tr) if "," in headword_tr: pagemsg("WARNING: Comma in headword manual translit, skipping: %s" % headword_tr) return None # Punt if multi-arg-set, can't handle yet for decl_template in decl_templates: for param in decl_template.params: if not param.showkey: val = unicode(param.value) if val == "or": pagemsg("WARNING: Manual translit and multi-decl templates, can't handle, skipping: %s" % unicode(decl_template)) return None if val == "-" or val == "_" or val.startswith("join:"): pagemsg("WARNING: Manual translit and multi-word templates, can't handle, skipping: %s" % unicode(decl_template)) return None for i in xrange(2, 10): if getparam(headword_template, "tr%s" % i): pagemsg("WARNING: Headword template has translit param tr%s, can't handle, skipping: %s" % ( i, unicode(headword_template))) return None if runoun.arg1_is_stress(getparam(decl_template, "1")): lemma_arg = "2" else: lemma_arg = "1" lemmaval = getparam(decl_template, lemma_arg) if not lemmaval: lemmaval = subpagetitle if "//" in lemmaval: m = re.search("^(.*?)//(.*)$", lemmaval) if m.group(2) != headword_tr: pagemsg("WARNING: Found existing manual translit in decl template %s, but doesn't match headword translit %s; skipping" % ( lemmaval, headword_tr)) return None else: pagemsg("Already found manual translit in decl template %s" % lemmaval) else: lemmaval += "//" + headword_tr orig_decl_template = unicode(decl_template) decl_template.add(lemma_arg, lemmaval) pagemsg("Replacing decl %s with %s" % (orig_decl_template, unicode(decl_template))) frobbed_manual_translit = [headword_tr] genders = blib.fetch_param_chain(headword_template, "2", "g") bian_replaced = 0 # Change a=bi in decl to a=ia or a=ai, depending on order of anim/inan in # headword template for decl_template in decl_templates: if getparam(decl_template, "a") in ["b", "bi", "bian", "both"]: saw_in = -1 saw_an = -1 for i,g in enumerate(genders): if re.search(r"\bin\b", g) and saw_in < 0: saw_in = i if re.search(r"\ban\b", g) and saw_an < 0: saw_an = i if saw_in >= 0 and saw_an >= 0: orig_decl_template = unicode(decl_template) if saw_in < saw_an: pagemsg("Replacing a=bi with a=ia in decl template") decl_template.add("a", "ia") bian_replaced = 1 else: pagemsg("Replacing a=bi with a=ai in decl template") decl_template.add("a", "ai") bian_replaced = 1 pagemsg("Replacing decl %s with %s" % (orig_decl_template, unicode(decl_template))) generate_template = re.sub(r"^\{\{ru-noun-table", "{{ru-generate-noun-args", unicode(noun_table_template)) generate_result = expand_text(generate_template) if not generate_result: pagemsg("WARNING: Error generating noun args, skipping") return None args = ru.split_generate_args(generate_result) genders = runoun.check_old_noun_headword_forms(headword_template, args, subpagetitle, pagemsg) if genders == None: return None new_params = [] for param in noun_table_template.params: new_params.append((param.name, param.value)) orig_headword_template = unicode(headword_template) params_to_preserve = runoun.fix_old_headword_params(headword_template, new_params, genders, pagemsg) if params_to_preserve == None: return None if unicode(headword_template.name) == "ru-proper noun": # If proper noun and n is both then we need to add n=both because # proper noun+ defaults to n=sg if args["n"] == "b" and not getparam(headword_template, "n"): pagemsg("Adding n=both to headword tempate") headword_template.add("n", "both") # Correspondingly, if n is sg then we can usually remove n=sg; # but we need to check that the number is actually sg with n=sg # removed because of the possibility of plurale tantum lemmas if args["n"] == "s": generate_template_with_ndef = generate_template.replace("}}", "|ndef=sg}}") generate_template_with_ndef = re.sub(r"\|n=s[^=|{}]*", "", generate_template_with_ndef) generate_result = expand_text(generate_template_with_ndef) if not generate_result: pagemsg("WARNING: Error generating noun args, skipping") return None ndef_args = ru.split_generate_args(generate_result) if ndef_args["n"] == "s": existing_n = getparam(headword_template, "n") if existing_n and not re.search(r"^s", existing_n): pagemsg("WARNING: Something wrong: Found n=%s, not singular" % existing_n) else: pagemsg("Removing n=sg from headword tempate") rmparam(headword_template, "n") else: pagemsg("WARNING: Unable to remove n= from headword template because n=%s" % ndef_args["n"]) headword_template.params.extend(params_to_preserve) ru_noun_changed = 0 ru_proper_noun_changed = 0 if unicode(headword_template.name) == "ru-noun": headword_template.name = "ru-noun+" ru_noun_changed = 1 else: headword_template.name = "ru-proper noun+" ru_proper_noun_changed = 1 pagemsg("Replacing headword %s with %s" % (orig_headword_template, unicode(headword_template))) return unicode(parsed), ru_noun_changed, ru_proper_noun_changed, bian_replaced, frobbed_manual_translit
"{{ru-noun+", proposed_template_text) proposed_decl = blib.parse_text("{{ru-noun-table}}").filter_templates()[0] for param in generate_template.params: proposed_decl.add(param.name, param.value) def pagemsg_with_proposed(text): pagemsg("Proposed new template (WARNING, omits explicit gender and params to preserve from old template): %s" % proposed_template_text) pagemsg(text) if headword_is_proper: generate_template.add("ndef", "sg") generate_result = expand_text(unicode(generate_template)) if not generate_result: pagemsg_with_proposed("WARNING: Error generating noun args, skipping") return genargs = ru.split_generate_args(generate_result) if headword_is_proper and genargs["n"] == "s" and not getparam(proposed_decl, "n"): proposed_decl.add("n", "sg") # This will check number mismatch (and animacy mismatch, but that shouldn't # occur as we've taken the animacy directly from the headword) new_genders = runoun.check_old_noun_headword_forms(headword_template, genargs, subpagetitle, pagemsg_with_proposed, laxer_comparison=True) if new_genders == None: return None orig_headword_template = unicode(headword_template) params_to_preserve = runoun.fix_old_headword_params(headword_template, params, new_genders, pagemsg_with_proposed) if params_to_preserve == None: return None
def process_page(index, page, save, verbose, fix_pages): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): tname = unicode(t.name) if tname in ["ru-conj", "ru-conj-old"]: if [x for x in t.params if unicode(x.value) == "or"]: pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t)) continue conjtype = getparam(t, "2") if tname == "ru-conj": tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t)) else: tempcall = re.sub(r"\{\{ru-conj-old", "{{ru-generate-verb-forms|old=y", unicode(t)) result = expand_text(tempcall) if not result: pagemsg("WARNING: Error generating forms, skipping") continue args = rulib.split_generate_args(result) for base in ["past_pasv_part", "ppp"]: forms_to_remove = [] if args[base] == "-": continue for form in re.split(",", args[base]): origform = form form = re.sub("//.*", "", form) fix_form = False if not re.search(ur"([аяеё]́?нный|тый)$", form): pagemsg("WARNING: Past passive participle doesn't end correctly: %s" % form) fix_form = True unstressed_page = rulib.make_unstressed(pagetitle) unstressed_form = rulib.make_unstressed(form) warned = False if unstressed_form[0] != unstressed_page[0]: pagemsg("WARNING: Past passive participle doesn't begin with same letter, probably for wrong aspect: %s" % form) warned = True fix_form = True if form.endswith(u"нный"): if pagetitle.endswith(u"ать"): good_ending = u"анный" elif pagetitle.endswith(u"ять"): good_ending = u"янный" else: good_ending = u"енный" if not unstressed_form.endswith(good_ending): pagemsg("WARNING: Past passive participle doesn't end right, probably for wrong aspect: %s" % form) warned = True fix_form = True if not warned: correct_form = form_ppp(conjtype, pagetitle, args) if correct_form and unstressed_form != correct_form: pagemsg("WARNING: Past passive participle not formed according to rule, probably wrong: found %s, expected %s" % (unstressed_form, correct_form)) fix_form = True if fix_form: forms_to_remove.append(origform) if forms_to_remove and pagetitle in fix_pages: curvals = [] for i in ["", "2", "3", "4", "5", "6", "7", "8", "9"]: val = getparam(t, base + i) if val: curvals.append(val) newvals = [x for x in curvals if x not in forms_to_remove] if len(curvals) - len(newvals) != len(forms_to_remove): pagemsg("WARNING: Something wrong, couldn't remove all PPP forms %s" % ",".join(forms_to_remove)) curindex = 1 origt = unicode(t) for newval in newvals: t.add(base + ("" if curindex == 1 else str(curindex)), newval) curindex += 1 for i in xrange(curindex, 10): rmparam(t, base + ("" if i == 1 else str(i))) pagemsg("Replacing %s with %s" % (origt, unicode(t))) notes.append("removed bad past pasv part(s) %s" % ",".join(forms_to_remove))
def process_page(index, page, direc, delete_bad, fix_verbs, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] direc = direc.replace("3oa", u"3°a") for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj"]: conjtype = getparam(t, "1") if not conjtype.startswith("3olda"): continue if conjtype.startswith("3olda") and conjtype != "3olda": pagemsg("WARNING: Found 3a-old with variant, can't process: %s" % unicode(t)) continue tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t)) result = expand_text(tempcall) if not result: pagemsg("WARNING: Error generating forms, skipping") continue oldargs = rulib.split_generate_args(result) rmparam(t, "6") rmparam(t, "5") rmparam(t, "4") t.add("1", direc) tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t)) result = expand_text(tempcall) if not result: pagemsg("WARNING: Error generating forms, skipping") continue if delete_bad: newargs = rulib.split_generate_args(result) for form in ["past_m", "past_f", "past_n", "past_pl", "past_m_short", "past_f_short", "past_n_short", "past_pl_short"]: oldforms = re.split(",", oldargs[form]) if form in oldargs else [] newforms = re.split(",", newargs[form]) if form in newargs else [] for oldform in oldforms: if oldform not in newforms: formpagename = rulib.remove_accents(oldform) formpage = pywikibot.Page(site, formpagename) if not formpage.exists(): pagemsg("WARNING: Form page %s doesn't exist, skipping" % formpagename) elif formpagename == pagetitle: pagemsg("WARNING: Attempt to delete dictionary form, skipping") else: text = unicode(formpage.text) if "Etymology 1" in text: pagemsg("WARNING: Found 'Etymology 1', skipping form %s" % formpagename) elif "----" in text: pagemsg("WARNING: Multiple languages apparently in form, skippin form %s" % formpagename) else: numinfls = len(re.findall(r"\{\{inflection of\|", text)) if numinfls < 1: pagemsg("WARNING: Something wrong, no 'inflection of' templates on page for form %s" % formpagename) elif numinfls > 1: pagemsg("WARNING: Multiple 'inflection of' templates on page for form %s, skipping" % formpagename) else: comment = "Delete erroneously created long form of %s" % pagetitle pagemsg("Existing text for form %s: [[%s]]" % ( formpagename, text)) if save: formpage.delete(comment) else: pagemsg("Would delete page %s with comment=%s" % (formpagename, comment)) notes.append("fix 3olda -> %s" % direc) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text and fix_verbs: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)