def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn == "head" and getparam(t, "1") == "bnt-pro" and getparam( t, "2") == "verb": rmparam(t, "1") rmparam(t, "2") # Check for unrecognized params. params = [] unrecognized = False for param in t.params: pagemsg("Saw unrecognized param %s=%s in %s" % (unicode(param.name), unicode(param.value), origt)) unrecognized = True if unrecognized: continue blib.set_template_name(t, "bnt-verb") notes.append("convert {{head|bnt-pro|verb}} to {{bnt-verb}}") if unicode(t) != origt: pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t))) return unicode(parsed), notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) notes = [] pagemsg("Processing") head = None last_lang = None for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn in ["citation", "citations"]: last_lang = getparam(t, "1") if tn == "timeline": if last_lang == "en": blib.set_template_name(t, "en-timeline") notes.append("'timeline' -> 'en-timeline'") else: pagemsg( "WARNING: Skipped due to not being on English citations page (last_lang=%s): %s" % (last_lang, unicode(t))) return unicode(parsed), notes
def replace_spenser_fq(m): template, text = m.groups() parsed = blib.parse_text(template) t = list(parsed.filter_templates())[0] par2 = getparam(t, "2") if par2: canto = arabic_to_roman(par2) if not canto: return m.group(0) t.add("canto", canto, before="2") rmparam(t, "2") par1 = getparam(t, "1") if par1: book = arabic_to_roman(par1) if not book: return m.group(0) t.add("book", book, before="1") rmparam(t, "1") text = re.sub(r"\s*<br */?>\s*", " / ", text) text = re.sub(r"^\{\{quote\|en\|(.*)\}\}$", r"\1", text) t.add("passage", text) blib.set_template_name(t, "RQ:Spenser Faerie Queene") notes.append( "reformat {{RQ:Spenser FQ}} into {{RQ:Spenser Faerie Queene}}") return unicode(t) + "\n"
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] for t in parsed.filter_templates(): tn = tname(t) if tn == "la-decl-2nd": stem = getparam(t, "1") if stem.endswith("i"): blib.set_template_name(t, "la-decl-2nd-ius") t.add("1", stem[:-1]) notes.append("Fix noun in -ius to use {{la-decl-2nd-ius}}") else: pagemsg("WARNING: Found la-decl-2nd without stem in -i: %s" % unicode(t)) elif tn == "la-decl-2nd-N": stem = getparam(t, "1") if stem.endswith("i"): blib.set_template_name(t, "la-decl-2nd-N-ium") t.add("1", stem[:-1]) notes.append("Fix noun in -ium to use {{la-decl-2nd-N-ium}}") else: pagemsg("WARNING: Found la-decl-2nd-N without stem in -i: %s" % unicode(t)) return unicode(parsed), notes
def do_process_text_on_page(index, pagename, text, adj): def pagemsg(txt): msg("Page %s %s: %s" % (index, pagename, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, pagename, txt)) pagemsg("Processing") notes = [] if "==Etymology 1==" in text or "==Pronunciation 1==" in text: pagemsg("WARNING: Saw Etymology/Pronunciation 1, can't handle yet") return parsed = blib.parse_text(text) headword = None for t in parsed.filter_templates(): tn = tname(t) if tn in (adj and ["bg-adj"] or ["bg-noun", "bg-proper noun"]): headword = getparam(t, "1") if (tn == "bg-decl-adj" if adj else tn.startswith("bg-noun-")): origt = unicode(t) if not headword: pagemsg("WARNING: Saw %s without {{%s}} headword" % (origt, "bg-adj" if adj else "bg-noun")) continue del t.params[:] t.add("1", "%s<>" % headword) blib.set_template_name(t, "bg-adecl" if adj else "bg-ndecl") pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append("convert {{%s}} to {{%s}}" % (tn, tname(t))) return text, notes
def process_text_on_page(index, pagename, text): def pagemsg(txt): msg("Page %s %s: %s" % (index, pagename, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, pagename, txt)) pagemsg("Processing") notes = [] parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) if tn == "head" and getparam(t, "1") == "la": pos = getparam(t, "2") if pos not in pos_to_template: pagemsg("WARNING: Saw unrecognized part of speech %s: %s" % (pos, unicode(t))) continue if getparam(t, "3") or getparam(t, "head"): pagemsg("WARNING: Saw 3= or head=: %s" % unicode(t)) continue origt = unicode(t) t.add("1", pagename) blib.set_template_name(t, pos_to_template[pos]) rmparam(t, "2") t.add("FIXME", "1") pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append("replace {{head|la|%s}} with {{%s}}" % (pos, tname(t))) return unicode(parsed), notes
def process_text_on_page_for_full_conj(index, pagename, text, verbs): global args def pagemsg(txt): msg("Page %s %s: %s" % (index, pagename, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, pagename, txt)) pagemsg("Processing") notes = [] if pagename not in verbs: pagemsg("WARNING: Can't find entry, skipping") return entry = verbs[pagename] origentry = entry first, rest = pagename.split(" ", 1) restwords = rest.split(" ") def_link = "%s<> %s" % (first, " ".join("[[%s]]" % word for word in restwords)) if def_link == entry: pagemsg("Replacing entry '%s' with a blank entry because it's the default" % entry) entry = "" elif re.sub("<.*?>", "<>", entry) == def_link: newentry = blib.remove_links(entry) pagemsg("Replacing entry '%s' with entry without links '%s'" % (entry, newentry)) entry = newentry parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) origt = unicode(t) if tn == "es-verb": if not getparam(t, "attn"): pagemsg("Didn't see attn=1: %s" % unicode(t)) continue rmparam(t, "attn") if entry: t.add("1", entry) notes.append("add conjugation '%s' to Spanish verb" % entry) else: notes.append("add conjugation (default) to Spanish verb") if tn == "head" and getparam(t, "1") == "es" and getparam(t, "2") == "verb": head = getparam(t, "head") if head: pagemsg("WARNING: Removing head=%s compared with entry '%s', original entry '%s': %s" % (head, entry, origentry, unicode(t))) rmparam(t, "head") rmparam(t, "2") rmparam(t, "1") blib.set_template_name(t, "es-verb") if entry: t.add("1", entry) notes.append("convert {{head|es|verb}} to {{es-verb|%s}}" % entry) else: notes.append("convert {{head|es|verb}} to {{es-verb}}") if origt != unicode(t): pagemsg("Replaced %s with %s" % (origt, unicode(t))) return unicode(parsed), notes
def hack_templates(parsed, langname, langnamecode=None, is_citation=False): if langname not in blib.languages_byCanonicalName: if not is_citation: langnamecode = None else: langnamecode = blib.languages_byCanonicalName[langname]["code"] for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn in ["citation", "citations"] and is_citation: langnamecode = getparam(t, "lang") or getparam(t, "1") if tn in templates_to_process: if getparam(t, langparam): pass elif not langnamecode: pagemsg( "WARNING: Unrecognized language %s, unable to add language to %s" % (langname, origt)) else: notes.append( "infer %s=%s for {{%s}} based on section it's in" % (langparam, langnamecode, tn)) newline = "\n" if "\n" in unicode(t.name) else "" if langparam == "1": if t.has("lang"): pagemsg( "WARNING: Template has lang=, removing: %s" % origt) notes.append("remove lang= from {{%s}}" % tn) rmparam(t, "lang") t.add(langparam, langnamecode + newline, preserve_spacing=False) else: # Fetch all params. params = [] for param in t.params: pname = unicode(param.name) params.append((pname, param.value, param.showkey)) # Erase all params. del t.params[:] t.add(langparam, langnamecode + newline, preserve_spacing=False) # Put remaining parameters in order. for name, value, showkey in params: t.add(name, value, showkey=showkey, preserve_spacing=False) if tn in templates_to_rename: blib.set_template_name(t, templates_to_rename[tn]) notes.append("rename {{%s}} to {{%s}}" % (tn, templates_to_rename[tn])) newt = unicode(t) if newt != origt: pagemsg("Replaced <%s> with <%s>" % (origt, newt)) return langnamecode
def process_text_on_page(index, pagetitle, text): global args def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) notes = [] parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) def getp(param): return getparam(t, param) if tn == "Wikisource1911Enc Citation": origt = unicode(t) param1 = getp("1") t.add("1", "1911") t.add("2", param1) blib.set_template_name(t, "projectlink") if origt != unicode(t): pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append( "convert {{Wikisource1911Enc Citation}} to {{projectlink|1911}}" ) return unicode(parsed), notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) text = unicode(page.text) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] if blib.page_should_be_ignored(pagetitle): pagemsg("WARNING: Page should be ignored") return None, None parsed = blib.parse_text(text) for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn in templates: infl_params = templates[tn] lang = getparam(t, "lang") if lang: has_lang = True term = getparam(t, "1") alt = getparam(t, "2") gloss = getparam(t, "3") else: has_lang = False lang = getparam(t, "1") term = getparam(t, "2") alt = getparam(t, "3") gloss = getparam(t, "4") params = [] for param in t.params: pname = unicode(param.name).strip() pval = unicode(param.value).strip() if pname in ["lang", "1", "2", "3"] or (pname == "4" and not has_lang): continue pagemsg("WARNING: Unrecognized param %s, skipping" % pname) return None, None # Erase all params. del t.params[:] # Put back new params. blib.set_template_name(t, "inflection of") t.add("1", lang) t.add("2", term) t.add("3", alt) for index, tag in enumerate(infl_params): t.add(str(index + 4), tag) if gloss: t.add("t", gloss) if origt != unicode(t): pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append("replace {{%s}} with {{inflection of}}" % tn) return unicode(parsed), notes
def process_page(index, page, template, new_name, params_to_add, params_to_remove, params_to_rename, filters, comment): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] parsed = blib.parse(page) for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn == template: for filt in filters: m = re.search("^(.*)=(.*)$", filt) if m: if getparam(t, m.group(1)) != m.group(2): pagemsg( "Skipping %s because filter %s doesn't match" % origt, filt) continue else: m = re.search("^(.*)~(.*)$", filt) if m: if not re.search(m.group(2), getparam(t, m.group(1))): pagemsg( "Skipping %s because filter %s doesn't match" % origt, filt) continue else: raise ValueError("Unrecognized filter %s" % filt) for old_param, new_param in params_to_rename: if t.has(old_param): t.add(new_param, getparam(t, old_param), before=old_param, preserve_spacing=False) rmparam(t, old_param) notes.append("rename %s= to %s= in {{%s}}" % (old_param, new_param, tn)) for param in params_to_remove: if t.has(param): rmparam(t, param) notes.append("remove %s= from {{%s}}" % (param, tn)) for param, value in params_to_add: if getparam(t, param) != value: t.add(param, value) notes.append("add %s=%s to {{%s}}" % (param, value, tn)) if new_name: blib.set_template_name(t, new_name) notes.append("rename {{%s}} to {{%s}}" % (template, new_name)) if unicode(t) != origt: pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t))) return unicode(parsed), comment or notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] for t in parsed.filter_templates(): tn = tname(t) origt = unicode(t) if (tn == "head" and getparam(t, "1") == "it" and getparam(t, "2") in ["noun", "nouns"] and getparam(t, "3") == "invariable"): must_continue = False g = None g2 = None head = None for param in t.params: pname = unicode(param.name).strip() pval = unicode(param.value).strip() showkey = param.showkey if pname in ["1", "2", "3"]: pass elif pname == "g": g = pval elif pname == "g2": g2 = pval elif pname == "head": head = pval else: pagemsg("WARNING: Saw unrecognized param %s: %s" % (pname, unicode(t))) must_continue = True break if must_continue: continue if not g: pagemsg("WARNING: Didn't see gender: %s" % unicode(t)) continue origt = unicode(t) del t.params[:] blib.set_template_name(t, "it-noun") if head: t.add("head", head) t.add("1", g) if g2: t.add("g2", g2) t.add("2", "-") notes.append( "replace {{head|it|noun|...|invariable}} with {{it-noun|...|-}}" ) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes
def convert_template_to_new(t, pagetitle, pagemsg, errandpagemsg): origt = unicode(t) tn = tname(t) m = re.search(r"^la-decl-(.*)$", tn) if not m: pagemsg("WARNING: Something wrong, can't parse noun decl template name: %s" % tn) return None decl_suffix = m.group(1) if decl_suffix not in la_noun_decl_suffix_to_decltype: pagemsg("WARNING: Unrecognized noun decl template name: %s" % tn) return None retval = la_noun_decl_suffix_to_decltype[decl_suffix] if retval is None: pagemsg("WARNING: Unable to convert template: %s" % unicode(t)) return None declspec, stem_suffix, pl_suffix, to_auto = retval if type(declspec) is tuple: declspec = declspec[0] stem1 = getparam(t, "1").strip() stem2 = getparam(t, "2").strip() num = getrmparam(t, "num") lemma, stem2, subtypes = compute_noun_lemma_and_subtypes(declspec, stem1, stem2, num, stem_suffix, pl_suffix, to_auto, pagemsg, origt) loc = getrmparam(t, "loc") if bool_param_is_true(loc): subtypes.append("loc") lig = getrmparam(t, "lig") if bool_param_is_true(lig): subtypes.append("lig") um = getrmparam(t, "um") genplum = getrmparam(t, "genplum") if bool_param_is_true(um) or bool_param_is_true(genplum): subtypes.append("genplum") sufn = getrmparam(t, "n") if bool_param_is_true(sufn): subtypes.append("sufn") blib.set_template_name(t, "la-ndecl") # Fetch all params named_params = [] for param in t.params: pname = unicode(param.name) if pname.strip() in ["1", "2", "noun"]: continue named_params.append((pname, param.value, param.showkey)) # Erase all params del t.params[:] # Put back params if stem2: lemma += "/" + stem2 lemma += "<%s>" % ".".join([declspec] + subtypes) t.add("1", lemma) for name, value, showkey in named_params: t.add(name, value, showkey=showkey, preserve_spacing=False) pagemsg("Replaced %s with %s" % (origt, unicode(t))) if compare_new_and_old_templates(origt, unicode(t), pagetitle, pagemsg, errandpagemsg): return t else: return None
def process_text_on_page(index, pagetitle, text): def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) global args notes = [] pagemsg("Processing") parsed = blib.parse_text(text) for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn == "vi-hantu": if not one_char(pagetitle): pagemsg("WARNING: Length of page title is %s > 1, skipping" % len(pagetitle)) continue if getparam(t, "pos"): pagemsg("WARNING: Saw pos=, skipping: %s" % unicode(t)) continue chu = getparam(t, "chu") if chu and chu != "Nom": pagemsg("WARNING: Saw chu=%s not 'Nom', skipping: %s" % (chu, unicode(t))) continue if chu == "Nom": newparam = "nom" else: newparam = "reading" reading = blib.remove_links(getparam(t, "1")) if not reading: pagemsg("WARNING: Empty reading, skipping: %s" % unicode(t)) continue must_continue = False for param in t.params: pn = pname(param) if pn not in ["1", "rs", "chu"]: pagemsg( "WARNING: Unrecognized parameter %s=%s, skipping: %s" % (pn, unicode(param.value), unicode(t))) must_continue = True break if must_continue: continue t.add(newparam, reading, before="1") rmparam(t, "1") blib.set_template_name(t, "vi-readings") notes.append("{{vi-hantu}} -> {{vi-readings}}") if unicode(t) != origt: pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t))) return unicode(parsed), notes
def convert_template_to_new(t, pagetitle, pagemsg, errandpagemsg): origt = unicode(t) tn = tname(t) m = re.search(r"^la-(.*)$", tn) if not m: pagemsg( "WARNING: Something wrong, can't parse adj decl template name: %s" % tn) return None decl_suffix = m.group(1) if decl_suffix not in la_adj_decl_suffix_to_decltype: pagemsg("WARNING: Unrecognized adj decl template name: %s" % tn) return None decl, compute_props = la_adj_decl_suffix_to_decltype[decl_suffix] stem1 = getparam(t, "1").strip() stem2 = getparam(t, "2").strip() num = getrmparam(t, "num") specified_subtypes = getrmparam(t, "type") if specified_subtypes: specified_subtypes = specified_subtypes.split("-") else: specified_subtypes = [] lemma, stem2, decl, subtypes = (compute_props(stem1, stem2, decl, specified_subtypes, num, None, True, pagetitle, pagemsg)) if num == "sg": subtypes.append("sg") decl += "+" blib.set_template_name(t, "la-adecl") # Fetch all params named_params = [] for param in t.params: pname = unicode(param.name) if pname.strip() in ["1", "2", "noun"]: continue named_params.append((pname, param.value, param.showkey)) # Erase all params del t.params[:] # Put back params if stem2: lemma += "/" + stem2 subtypes = [decl] + subtypes if subtypes != ["+"]: lemma += "<%s>" % ".".join(subtypes) t.add("1", lemma) for name, value, showkey in named_params: t.add(name, value, showkey=showkey, preserve_spacing=False) pagemsg("Replaced %s with %s" % (origt, unicode(t))) if compare_new_and_old_templates(origt, unicode(t), pagetitle, pagemsg, errandpagemsg): return t else: return None
def replace_bacon_the_advancement_of_learning(m): template, text = m.groups() parsed = blib.parse_text(template) t = list(parsed.filter_templates())[0] text = re.sub(r"\s*<br */?>\s*", " / ", text) text = re.sub(r"^''(.*)''$", r"\1", text) t.add("passage", text) blib.set_template_name(t, "RQ:Bacon Learning") notes.append( "reformat {{RQ:Bacon The Advancement of Learning}} into {{RQ:Bacon Learning}}" ) return unicode(t) + "\n"
def replace_chapman_odyssey(m): template, text = m.groups() parsed = blib.parse_text(template) t = list(parsed.filter_templates())[0] if tname(t) != "RQ:Chapman Odyssey": return m.group(0) text = re.sub(r"\s*<br */?>\s*", " / ", text) text = re.sub(r"^''(.*)''$", r"\1", text) t.add("passage", text) blib.set_template_name(t, "RQ:Homer Chapman Odysseys") notes.append("reformat {{RQ:Chapman Odyssey}} into {{RQ:Homer Chapman Odysseys}}") return unicode(t) + "\n"
def replace_lestrange_fables(m): template, text = m.groups() parsed = blib.parse_text(template) t = list(parsed.filter_templates())[0] if tname(t) != "RQ:L'Estrange Fables": return m.group(0) text = re.sub(r"\s*<br */?>\s*", " / ", text) text = re.sub(r"^''(.*)''$", r"\1", text) t.add("passage", text) blib.set_template_name(t, "RQ:L'Estrange Fables of Aesop") notes.append("reformat {{RQ:L'Estrange Fables}} into {{RQ:L'Estrange Fables of Aesop}}") return unicode(t) + "\n"
def replace_browne_errors(m): template, text = m.groups() parsed = blib.parse_text(template) t = list(parsed.filter_templates())[0] if tname(t) != "RQ:Browne Errors": return m.group(0) text = re.sub(r"\s*<br */?>\s*", " / ", text) text = re.sub(r"^''(.*)''$", r"\1", text) t.add("passage", text) blib.set_template_name(t, "RQ:Browne Pseudodoxia Epidemica") notes.append("reformat {{RQ:Browne Errors}} into {{RQ:Browne Pseudodoxia Epidemica}}") return unicode(t) + "\n"
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] text = unicode(page.text) parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) origt = unicode(t) if tn == "head" and getparam(t, "1") == "ang" and getparam( t, "2") in ["adjective", "adjectives"]: for param in t.params: pn = pname(param) if pn not in ["1", "2", "head"]: pagemsg( "WARNING: head|ang|adjective with extra params: %s" % unicode(t)) break else: # no break blib.set_template_name(t, "ang-adj") rmparam(t, "1") rmparam(t, "2") notes.append("convert {{head|ang|adjective}} into {{ang-adj}}") elif tn == "ang-adj": if getparam(t, "2"): t.add("1", "") notes.append("remove unneeded 1= from {{ang-adj}}") else: param1 = getparam(t, "1") if param1: t.add("1", "") t.add("2", param1) notes.append("move 1= to 2= in {{ang-adj}}") param4 = getparam(t, "4") if param4: rmparam(t, "4") if not getparam(t, "1"): t.add("1", "") if not getparam(t, "2"): t.add("2", "") t.add("3", param4) notes.append("move 4= to 3= in {{ang-adj}}") if unicode(t) != origt: pagemsg("Replaced %s with %s" % (origt, unicode(t))) return parsed, notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn == "#invoke:form of/templates" and getparam( t, "1") == "template_tags": t.add("1", "tagged_form_of_t") notes.append( "Rewrite {{#invoke:form of/templates|template_tags}} with {{#invoke:form of/templates|tagged_form_of_t}}" ) if tn == "#invoke:form of" and getparam( t, "1") in ["form_of_t", "alt_form_of_t"]: ignorelist = blib.fetch_param_chain(t, "ignorelist", "ignorelist") if ignorelist: ignore = blib.fetch_param_chain(t, "ignore", "ignore") for il in ignorelist: ignore.append(il + ":list") blib.set_param_chain(t, ignore, "ignore", "ignore", before="ignorelist") blib.remove_param_chain(t, "ignorelist", "ignorelist") blib.set_template_name(t, "#invoke:form of/templates") notes.append( "Rewrite {{#invoke:form of|%s}} with {{#invoke:form of/templates|form_of_t}}" % getparam(t, "1")) if tn == "#invoke:form of" and getparam(t, "1") == "alt_form_of_t": t.add("2", getparam(t, "text"), before="text") rmparam(t, "text") if t.has("nocap"): rmparam(t, "nocap") else: t.add("withcap", "1") if t.has("nodot"): rmparam(t, "nodot") else: t.add("withdot", "1") t.add("1", "form_of_t") if unicode(t) != origt: pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t))) return unicode(parsed), notes
def replace_bacon_ss(m): template, text = m.groups() parsed = blib.parse_text(template) t = list(parsed.filter_templates())[0] if tname(t) != "RQ:Bacon SS": return m.group(0) text = re.sub(r"\s*<br */?>\s*", " / ", text) text = re.sub(r"^''(.*)''$", r"\1", text) t.add("passage", text) blib.set_template_name(t, "RQ:Bacon Sylva Sylvarum") notes.append( "reformat {{RQ:Bacon SS}} into {{RQ:Bacon Sylva Sylvarum}}") return unicode(t) + "\n"
def process_text_on_page(index, pagename, text): def pagemsg(txt): msg("Page %s %s: %s" % (index, pagename, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, pagename, txt)) pagemsg("Processing") notes = [] curtext = text + "\n" for fromtemp, totemp in templates_to_rename: def reformat_template(m): template, text = m.groups() parsed = blib.parse_text(template) t = list(parsed.filter_templates())[0] if tname(t) != fromtemp: return m.group(0) for existing_param in ["passage", "text"]: if getparam(t, existing_param): pagemsg( "WARNING: Can't incorporate raw passage text into {{%s}} because already has %s=: %s" % (fromtemp, existing_param, unicode(t))) return m.group(0) text = re.sub(r"\s*<br */?>\s*", " / ", text) text = re.sub(r"^''(.*)''$", r"\1", text) t.add("passage", text) blib.set_template_name(t, totemp) notes.append( "reformat {{%s}} into {{%s}}, incorporating following raw passage text into passage=" % (fromtemp, totemp)) return unicode(t) + "\n" curtext = re.sub( r"(\{\{%s.*?\}\})\n#+\*:\s*(.*?)\n" % re.escape(fromtemp), reformat_template, curtext) parsed = blib.parse_text(curtext) for t in parsed.filter_templates(): tn = tname(t) if tn in templates_to_rename_dict: blib.set_template_name(t, templates_to_rename_dict[tn]) notes.append("rename {{%s}} to {{%s}}" % (tn, templates_to_rename_dict[tn])) curtext = unicode(parsed) return curtext.rstrip("\n"), notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] text = unicode(page.text) parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) origt = unicode(t) if tn == "head" and getparam(t, "1") == "ang" and getparam( t, "2") in ["verb", "verbs"]: for param in t.params: pn = pname(param) if pn not in ["1", "2", "head"]: pagemsg("WARNING: head|ang|verb with extra params: %s" % unicode(t)) break else: # no break blib.set_template_name(t, "ang-verb") rmparam(t, "1") rmparam(t, "2") notes.append("convert {{head|ang|verb}} into {{ang-verb}}") head = getparam(t, "head") if head: t.add("1", head) rmparam(t, "head") elif tn == "ang-verb": head = getparam(t, "head") head2 = getparam(t, "head2") head3 = getparam(t, "head3") rmparam(t, "head") rmparam(t, "head2") rmparam(t, "head3") if head: t.add("1", head) if head2: t.add("head2", head2) if head3: t.add("head3", head3) notes.append("move head= to 1= in {{ang-verb}}") if unicode(t) != origt: pagemsg("Replaced %s with %s" % (origt, unicode(t))) return parsed, notes
def replace_clarendon_rebellion(m): template, text = m.groups() parsed = blib.parse_text(template) t = list(parsed.filter_templates())[0] if tname(t) != "RQ:Clarendon Rebellion": return m.group(0) text = re.sub(r"\s*<br */?>\s*", " / ", text) text = re.sub(r"^''(.*)''$", r"\1", text) t.add("passage", text) blib.set_template_name(t, "RQ:Clarendon History") notes.append( "reformat {{RQ:Clarendon Rebellion}} into {{RQ:Clarendon History}}" ) return unicode(t) + "\n"
def put_back_new_inflection_of_params(t, notes, tags, params, lang, term, tr, alt, convert_to_more_specific_template=False): # Erase all params. del t.params[:] # Put back new params. # Strip comment continuations and line breaks. Such cases generally # have linebreaks after semicolons as well, but we remove those. # (FIXME, consider preserving them.) t.add("1", remove_comment_continuations(lang)) t.add("2", remove_comment_continuations(term)) tr = remove_comment_continuations(tr) if tr: t.add("tr", tr) if (convert_to_more_specific_template and tname(t) in generic_inflection_of_templates and tuple(tags) in tags_to_templates): tempname = tags_to_templates[tuple(tags)] old_tn = tname(t) # Convert to more specific template, e.g. {{plural of}}. blib.set_template_name(t, tempname) altparam = remove_comment_continuations(alt) if altparam: t.add("3", altparam) notes.append( "replace {{%s|%s|%s|...|%s}} with {{%s|%s|%s}}" % (old_tn, lang, term, "|".join(tags), tempname, lang, term)) else: t.add("3", remove_comment_continuations(alt)) next_tag_param = 4 # Put back the tags into the template and note stats on bad tags for tag in tags: t.add(str(next_tag_param), tag) next_tag_param += 1 # Finally, put back misc. tags. for pname, pval, showkey in params: t.add(pname, pval, showkey=showkey, preserve_spacing=False)
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn in templates_to_rename: template_specs = templates_to_rename[tn] new_name, new_params = template_specs[0], template_specs[1:] main_entry_param = "adj" if new_name == "bg-adj form of" else "noun" blib.set_template_name(t, new_name) # Fetch all params. params = [] old_1 = getparam(t, "1") for param in t.params: pname = unicode(param.name) if pname.strip() in ["1", "lang", "sc"]: continue if pname.strip() in ["2", "3", "4"]: errandmsg("WARNING: Found %s= in %s" % (pname.strip(), origt)) params.append((pname, param.value, param.showkey)) # Erase all params. del t.params[:] # Put back basic params for param_index, paramval in enumerate(new_params): t.add(str(param_index + 1), paramval) if not old_1: errandmsg("WARNING: No 1= in %s" % origt) else: t.add(main_entry_param, old_1) # Put remaining parameters in order. for name, value, showkey in params: t.add(name, value, showkey=showkey, preserve_spacing=False) notes.append( "rename {{%s}} to {{%s|%s|%s={{{1}}}}}" % (tn, new_name, "|".join(new_params), main_entry_param)) if unicode(t) != origt: pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t))) return unicode(parsed), notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn in templates_to_generalize: lang_params = templates_to_generalize[tn] if type(lang_params) is list: lang, sc = lang_params else: lang = lang_params sc = None # Fetch all params. params = [] for param in t.params: pname = unicode(param.name) if pname.strip() != "lang": params.append((pname, param.value, param.showkey)) # Erase all params. del t.params[:] t.add("1", lang) if sc: t.add("sc", sc) # Put remaining parameters in order. for name, value, showkey in params: if re.search("^[0-9]+$", name): t.add(str(int(name) + 1), value, showkey=showkey, preserve_spacing=False) else: t.add(name, value, showkey=showkey, preserve_spacing=False) blib.set_template_name(t, "romanization of") notes.append("rename {{%s}} to {{romanization of|%s%s}}" % (tn, lang, sc and "|sc=%s" % sc or "")) if unicode(t) != origt: pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t))) return unicode(parsed), notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) notes = [] pagemsg("Processing") for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn == "uk-conj-manual": aspect = getparam(t, "1") t.add("aspect", aspect, before="1", preserve_spacing=False) rmparam(t, "1") for param in t.params: pn = pname(param) if "_futr_" in pn: param.name = pn.replace("_futr_", "_fut_") to_fix = [] for param in t.params: pn = pname(param) pv = unicode(param.value) if pn.endswith("2"): to_fix.append((pn, pv)) for param in t.params: pn = pname(param) pv = unicode(param.value) if pn.endswith("3"): to_fix.append((pn, pv)) for pn, pv in to_fix: if pv.strip() and pv.strip() not in ["-", u"—"]: existing = getparam(t, pn[:-1]) if not existing: existing = pv else: existing = re.sub(r"(\s*)$", r", %s\1" % pv.strip(), existing) t.add(pn[:-1], existing, preserve_spacing=False) rmparam(t, pn) blib.set_template_name(t, "uk-conj-table") pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append("convert {{%s}} to {{uk-conj-table}}" % tn) return unicode(parsed), notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] for t in parsed.filter_templates(): tn = tname(t) param1 = None if tn == "R:SAOL": param1 = "saol" elif tn == "R:SO": param1 = "so" elif tn == "R:SAOB online": param1 = "saob" if param1: origt = unicode(t) rmparam(t, "2") # Fetch all params. params = [] for param in t.params: pname = unicode(param.name) if re.search("^[0-9]+$", pname.strip()): params.append((str(1 + int(pname.strip())), param.value, param.showkey)) else: params.append((pname, param.value, param.showkey)) # Erase all params. del t.params[:] t.add("1", param1) # Put back params in order. for name, value, showkey in params: t.add(name, value, showkey=showkey, preserve_spacing=False) blib.set_template_name(t, "R:svenska.se") notes.append("replace {{%s}} with {{R:svenska.se|%s}}" % (tn, param1)) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return parsed, notes
def replace_in_reference(parsed, in_what): for t in parsed.filter_templates(): tname = unicode(t.name) origt = unicode(t) if tname.strip() in ["reference-journal", "reference-news"]: set_template_name(t, "cite-journal", tname) pagemsg("%s -> cite-journal" % tname.strip()) notes.append("%s -> cite-journal" % tname.strip()) fix_page_params(t) pagemsg("Replacing %s with %s in %s" % (origt, unicode(t), in_what)) if tname.strip() == "reference-book": set_template_name(t, "cite-book", tname) pagemsg("reference-book -> cite-book") fixed_params = fix_cite_book_params(t) notes.append("reference-book -> cite-book%s" % ( fixed_params and " and fix book cite params" or "")) pagemsg("Replacing %s with %s in %s" % (origt, unicode(t), in_what))
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") if not page.exists(): pagemsg("WARNING: Page doesn't exist") return if ":" in pagetitle and not re.search( "^(Citations|Appendix|Reconstruction|Transwiki|Talk|Wiktionary|[A-Za-z]+ talk):", pagetitle): pagemsg("WARNING: Colon in page title and not a recognized namespace to include, skipping page") return text = unicode(page.text) notes = [] subsections = re.split("(^==.*==\n)", text, 0, re.M) newtext = text def move_param(t, fr, to, frob_from=None): if t.has(fr): oldval = getparam(t, fr) if not oldval.strip(): rmparam(t, fr) pagemsg("Removing blank param %s" % fr) return if frob_from: newval = frob_from(oldval) if not newval or not newval.strip(): return else: newval = oldval if getparam(t, to).strip(): pagemsg("WARNING: Would replace %s= -> %s= but %s= is already present: %s" % (fr, to, to, unicode(t))) elif oldval != newval: rmparam(t, to) # in case of blank param # If either old or new name is a number, use remove/add to automatically set the # showkey value properly; else it's safe to just change the name of the param, # which will preserve its location. if re.search("^[0-9]+$", fr) or re.search("^[0-9]+$", to): rmparam(t, fr) t.add(to, newval) else: tfr = t.get(fr) tfr.name = to tfr.value = newval pagemsg("%s=%s -> %s=%s" % (fr, oldval.replace("\n", r"\n"), to, newval.replace("\n", r"\n"))) else: rmparam(t, to) # in case of blank param # See comment above. if re.search("^[0-9]+$", fr) or re.search("^[0-9]+$", to): rmparam(t, fr) t.add(to, newval) else: t.get(fr).name = to pagemsg("%s -> %s" % (fr, to)) def fix_page_params(t): origt = unicode(t) for param in ["page", "pages"]: pageval = getparam(t, param) if re.search(r"^\s*pp?\.\s*", pageval): pageval = re.sub(r"^(\s*)pp?\.\s*", r"\1", pageval) t.add(param, pageval) notes.append("remove p(p). from %s=" % param) pagemsg("remove p(p). from %s=" % param) if re.search(r"^[0-9]+$", getparam(t, "pages").strip()): move_param(t, "pages", "page") if re.search(r"^[0-9]+[-–—]$", getparam(t, "page").strip()): move_param(t, "page", "pages") return origt != unicode(t) def fix_cite_book_params(t): origt = unicode(t) if getparam(t, "origyear").strip() and getparam(t, "year").strip(): if getparam(t, "year_published"): pagemsg("WARNING: Would set year_published= but is already present: %s" % unicode(t)) else: rmparam(t, "year_published") # in case of blank param t.get("year").name = "year_published" t.get("origyear").name = "year" pagemsg("year -> year_published, origyear -> year") move_param(t, "origdate", "date") move_param(t, "origmonth", "month") def frob_isbn(idval): isbn_re = r"^(\s*)(10-ISBN +|ISBN-13 +|ISBN:? +|ISBN[-=] *)" if re.search(isbn_re, idval, re.I): return re.sub(isbn_re, r"\1", idval, 0, re.I) elif re.search(r"^[0-9]", idval.strip()): return idval else: pagemsg("WARNING: Would replace id= -> isbn= but id=%s doesn't begin with 'ISBN '" % idval.replace("\n", r"\n")) return None move_param(t, "id", "isbn", frob_isbn) fix_page_params(t) return origt != unicode(t) def fix_cite_usenet_params(t): origt = unicode(t) move_param(t, "group", "newsgroup") move_param(t, "link", "url") return origt != unicode(t) def fix_quote_usenet_params(t): origt = unicode(t) monthday = getparam(t, "monthday").strip() year = getparam(t, "year").strip() if monthday and year: if getparam(t, "date"): pagemsg("WARNING: Would set date= but is already present: %s" % unicode(t)) else: rmparam(t, "date") # in case of blank param param = t.get("monthday") param.name = "date" if re.search("^[0-9]+/[0-9]+$", monthday): param.value = "%s/%s" % (monthday, year) else: param.value = "%s %s" % (monthday, year) rmparam(t, "year") pagemsg("monthday/year -> date") move_param(t, "group", "newsgroup") move_param(t, "text", "passage") move_param(t, "6", "passage") move_param(t, "5", "url") move_param(t, "4", "newsgroup") move_param(t, "3", "title") move_param(t, "2", "author") move_param(t, "1", "date") return origt != unicode(t) def replace_in_reference(parsed, in_what): for t in parsed.filter_templates(): tname = unicode(t.name) origt = unicode(t) if tname.strip() in ["reference-journal", "reference-news"]: set_template_name(t, "cite-journal", tname) pagemsg("%s -> cite-journal" % tname.strip()) notes.append("%s -> cite-journal" % tname.strip()) fix_page_params(t) pagemsg("Replacing %s with %s in %s" % (origt, unicode(t), in_what)) if tname.strip() == "reference-book": set_template_name(t, "cite-book", tname) pagemsg("reference-book -> cite-book") fixed_params = fix_cite_book_params(t) notes.append("reference-book -> cite-book%s" % ( fixed_params and " and fix book cite params" or "")) pagemsg("Replacing %s with %s in %s" % (origt, unicode(t), in_what)) for j in xrange(0, len(subsections), 2): parsed = blib.parse_text(subsections[j]) if j > 0 and re.search(r"^===*References===*\n", subsections[j-1]): replace_in_reference(parsed, "==References== section") subsections[j] = unicode(parsed) else: for t in parsed.filter_tags(): if unicode(t.tag) == "ref": tagparsed = mw.wikicode.Wikicode([t]) replace_in_reference(tagparsed, "<ref>") subsections[j] = unicode(parsed) need_to_replace_double_quote_prefixes = False for t in parsed.filter_templates(): tname = unicode(t.name) origt = unicode(t) for fr, to in simple_replace: if tname.strip() == fr: set_template_name(t, to, tname) pagemsg("%s -> %s" % (fr, to)) notes.append("%s -> %s" % (fr, to)) fix_page_params(t) pagemsg("Replacing %s with %s" % (origt, unicode(t))) if tname.strip() in ["reference-journal", "reference-news"]: set_template_name(t, "quote-journal", tname) pagemsg("%s -> quote-journal" % tname.strip()) notes.append("%s -> quote-journal" % tname.strip()) fix_page_params(t) pagemsg("Replacing %s with %s outside of reference section" % (origt, unicode(t))) if tname.strip() == "reference-book": set_template_name(t, "quote-book", tname) pagemsg("reference-book -> cite-book") fixed_params = fix_cite_book_params(t) notes.append("reference-book -> cite-book%s" % ( fixed_params and " and fix book cite params" or "")) pagemsg("Replacing %s with %s outside of reference section" % (origt, unicode(t))) if tname.strip() in ["cite-usenet", "quote-usenet"]: if tname.strip() == "cite-usenet": fixed_params = fix_cite_usenet_params(t) else: fixed_params = fix_quote_usenet_params(t) set_template_name(t, "quote-newsgroup", tname) pagemsg("%s -> quote-newsgroup" % tname.strip()) prefix = getparam(t, "prefix").strip() removed_prefix = False if prefix: if prefix in ["#", "#*"]: parsed.insert_before(t, "#* ") rmparam(t, "prefix") pagemsg("remove prefix=%s, insert #* before template" % prefix) need_to_replace_double_quote_prefixes = True removed_prefix = True else: pagemsg("WARNING: Found prefix=%s, not # or #*: %s" % (prefix, unicode(t))) notes.append("%s -> quote-newsgroup%s%s" % (tname.strip(), removed_prefix and ", remove prefix=%s, insert #* before template" % prefix or "", fixed_params and ", fix params" or "")) pagemsg("Replacing %s with %s" % (origt, unicode(t))) subsections[j] = unicode(parsed) if need_to_replace_double_quote_prefixes: newval = re.sub("^#\* #\* ", "#* ", subsections[j], 0, re.M) if newval != subsections[j]: notes.append("remove double #* prefix") pagemsg("Removed double #* prefix") subsections[j] = newval newtext = "".join(subsections) if text != newtext: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, newtext)) assert notes comment = "; ".join(blib.group_notes(notes)) if save: pagemsg("Saving with comment = %s" % comment) page.text = newtext page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)