def fix_quote_usenet_params(t): origt = unicode(t) monthday = getparam(t, "monthday").strip() year = getparam(t, "year").strip() if monthday and year: if getparam(t, "date"): pagemsg("WARNING: Would set date= but is already present: %s" % unicode(t)) else: rmparam(t, "date") # in case of blank param param = t.get("monthday") param.name = "date" if re.search("^[0-9]+/[0-9]+$", monthday): param.value = "%s/%s" % (monthday, year) else: param.value = "%s %s" % (monthday, year) rmparam(t, "year") pagemsg("monthday/year -> date") move_param(t, "group", "newsgroup") move_param(t, "text", "passage") move_param(t, "6", "passage") move_param(t, "5", "url") move_param(t, "4", "newsgroup") move_param(t, "3", "title") move_param(t, "2", "author") move_param(t, "1", "date") return origt != unicode(t)
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): if unicode(t.name) == "ru-phrase": if t.has("tr"): pagemsg("WARNING: Has tr=: %s" % unicode(t)) if t.has("head"): if t.has("1"): pagemsg("WARNING: Has both head= and 1=: %s" % unicode(t)) else: notes.append("ru-phrase: convert head= to 1=") origt = unicode(t) head = getparam(t, "head") rmparam(t, "head") tr = getparam(t, "tr") rmparam(t, "tr") t.add("1", head) if tr: t.add("tr", tr) pagemsg("Replacing %s with %s" % (origt, unicode(t))) return unicode(parsed), notes
def process_text_on_page(index, pagetitle, text): def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) notes = [] if "es-IPA" not in text and "fr-IPA" not in text and "it-IPA" not in text: return parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) origt = unicode(t) if tn in ["es-IPA", "fr-IPA", "it-IPA"]: must_continue = False for i in xrange(2, 11): if getparam(t, str(i)): pagemsg("Template has %s=, not touching: %s" % (i, origt)) must_continue = True break if must_continue: continue par1 = getparam(t, "1") if par1 == pagetitle: rmparam(t, "1") notes.append("remove redundant 1=%s from {{%s}}" % (par1, tn)) if unicode(t) != origt: pagemsg("Replaced %s with %s" % (origt, unicode(t))) return unicode(parsed), notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) param1 = getparam(t, "1") if unicode(t.name) in ["ru-conj"]: if re.search(r"^6[ac]", param1): if getparam(t, "no_iotation"): rmparam(t, "no_iotation") if param1.startswith("6a"): notes.append(u"6a + no_iotation -> 6°a") else: notes.append(u"6c + no_iotation -> 6°c") t.add("1", re.sub("^6", u"6°", param1)) elif re.search(r"^6b", param1): notes.append(u"6b -> 6°b") t.add("1", re.sub("^6", u"6°", param1)) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) found_page_head = False for t in parsed.filter_templates(): found_this_head = False tname = unicode(t.name) if tname in ru_head_templates: headname = tname found_this_head = True elif tname == "head" and getparam(t, "1") == "ru": headtype = getparam(t, "2") headname = "head|ru|%s" % headtype if headtype in ru_heads_to_warn_about: pagemsg("WARNING: Found %s" % headname) found_this_head = True if found_this_head: cat_head_count[headname] = cat_head_count.get(headname, 0) + 1 overall_head_count[headname] = overall_head_count.get(headname, 0) + 1 found_page_head = True if not found_page_head: pagemsg("WARNING: No head") if index % 100 == 0: output_heads_seen()
def canon_param(pagetitle, index, template, param, paramtr, translit_module, include_tempname_in_changelog=False): if isinstance(param, list): fromparam, toparam = param else: fromparam, toparam = (param, param) foreign = (pagetitle if fromparam == "page title" else getparam(template, fromparam)) latin = getparam(template, paramtr) if not foreign: return False canonforeign, canonlatin, actions = do_canon_param(pagetitle, index, template, fromparam, toparam, paramtr, foreign, latin, translit_module, include_tempname_in_changelog) oldtempl = "%s" % unicode(template) if canonforeign: addparam(template, toparam, canonforeign) if canonlatin == True: template.remove(paramtr) elif canonlatin: addparam(template, paramtr, canonlatin) if canonforeign or canonlatin: msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) return actions
def process_text_on_page_for_full_conj(index, pagename, text, verbs): global args def pagemsg(txt): msg("Page %s %s: %s" % (index, pagename, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, pagename, txt)) pagemsg("Processing") notes = [] if pagename not in verbs: pagemsg("WARNING: Can't find entry, skipping") return entry = verbs[pagename] origentry = entry first, rest = pagename.split(" ", 1) restwords = rest.split(" ") def_link = "%s<> %s" % (first, " ".join("[[%s]]" % word for word in restwords)) if def_link == entry: pagemsg("Replacing entry '%s' with a blank entry because it's the default" % entry) entry = "" elif re.sub("<.*?>", "<>", entry) == def_link: newentry = blib.remove_links(entry) pagemsg("Replacing entry '%s' with entry without links '%s'" % (entry, newentry)) entry = newentry parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) origt = unicode(t) if tn == "es-verb": if not getparam(t, "attn"): pagemsg("Didn't see attn=1: %s" % unicode(t)) continue rmparam(t, "attn") if entry: t.add("1", entry) notes.append("add conjugation '%s' to Spanish verb" % entry) else: notes.append("add conjugation (default) to Spanish verb") if tn == "head" and getparam(t, "1") == "es" and getparam(t, "2") == "verb": head = getparam(t, "head") if head: pagemsg("WARNING: Removing head=%s compared with entry '%s', original entry '%s': %s" % (head, entry, origentry, unicode(t))) rmparam(t, "head") rmparam(t, "2") rmparam(t, "1") blib.set_template_name(t, "es-verb") if entry: t.add("1", entry) notes.append("convert {{head|es|verb}} to {{es-verb|%s}}" % entry) else: notes.append("convert {{head|es|verb}} to {{es-verb}}") if origt != unicode(t): pagemsg("Replaced %s with %s" % (origt, unicode(t))) return unicode(parsed), notes
def canonicalize_one_page_verb_form(page, index, text): pagetitle = page.title() msg("Processing page %s" % pagetitle) actions_taken = [] for template in text.filter_templates(): if template.name == tempname: origtemp = unicode(template) form = getparam(template, formarg) if form: addparam(template, formarg, canonicalize_form(form)) newtemp = unicode(template) if origtemp != newtemp: msg("Replacing %s with %s" % (origtemp, newtemp)) if re.match("^[1I](-|$)", form): actions_taken.append("form=%s (%s/%s)" % (form, getparam(template, str(1+int(formarg))), getparam(template, str(2+int(formarg))))) else: actions_taken.append("form=%s" % form) changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % ( tempname, formarg, '; '.join(actions_taken)) if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog
def do_comparative_superlative_of(pos, existing_t, should_end): if getparam(t, "1") != "de": pagemsg( "WARNING: Saw wrong language in {{%s of}}, skipping: %s" % (pos, origt)) return False if existing_t: pagemsg( "WARNING: Saw two {{%s of}} templates, skipping: %s and %s" % (pos, unicode(existing_t), origt)) return False if not headt: pagemsg( "WARNING: Saw {{%s of}} without head template, skipping: %s" % (pos, origt)) return False if not pagetitle.endswith(should_end): pagemsg( "WARNING: Incorrect ending for %s, should be -%s, skipping" % (pos, should_end)) return False param2 = getparam(headt, "2") if param2 != "%s adjective" % pos: headt.add("2", "%s adjective" % pos) notes.append( "convert {{head|de|%s}} to {{head|de|%s adjective}}" % (param2, pos)) return t
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def errpagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) errmsg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] for t in parsed.filter_templates(): origt = unicode(t) if tname(t) in [ "ru-conj", "ru-conj-old", "User:Benwing2/ru-conj", "User:Benwing2/ru-conj-old" ]: t.add("1", getparam(t, "1").replace("-refl", "")) elif tname(t) == "temp" and getparam(t, "1") == "ru-conj": t.add("2", getparam(t, "2").replace("-refl", "")) newt = unicode(t) if origt != newt: notes.append("remove -refl from verb type") pagemsg("Replaced %s with %s" % (origt, newt)) return parsed, notes
def find_head_comp_sup(pagetitle, pagemsg): page = pywikibot.Page(site, pagetitle) text = unicode(page.text) parsed = blib.parse_text(text) for t in parsed.filter_templates(): if tname(t) == "la-adv": head = getparam(t, "1") comp = getparam(t, "comp") or getparam(t, "2") sup = getparam(t, "sup") or getparam(t, "3") if not comp or not sup: for suff in [ "iter", "nter", "ter", "er", u"iē", u"ē", "im", u"ō" ]: m = re.search("^(.*?)%s$" % suff, head) if m: stem = m.group(1) if suff == "nter": stem += "nt" default_comp = stem + "ius" default_sup = stem + u"issimē" break else: pagemsg( "WARNING: Didn't recognize ending of adverb headword %s" % head) return head, comp, sup comp = comp or default_comp sup = sup or default_sup return head, comp, sup return None, None, None
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] text = unicode(page.text) parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) origt = unicode(t) param = None if tn in ["bg-noun", "bg-proper noun", "bg-verb", "bg-adj", "bg-adv", "bg-part", "bg-part form", "bg-verbal noun", "bg-verbal noun form", "bg-phrase"]: param = "1" elif tn == "head" and getparam(t, "1") == "bg": param = "head" if param: val = getparam(t, param) val = bglib.decompose(val) if GR in val: val = val.replace(GR, AC) t.add(param, val) notes.append("convert grave to acute in {{%s}}" % tn) if unicode(t) != origt: pagemsg("Replaced %s with %s" % (origt, unicode(t))) return unicode(parsed), notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] for t in parsed.filter_templates(): tn = tname(t) if tn == "la-decl-2nd": stem = getparam(t, "1") if stem.endswith("i"): blib.set_template_name(t, "la-decl-2nd-ius") t.add("1", stem[:-1]) notes.append("Fix noun in -ius to use {{la-decl-2nd-ius}}") else: pagemsg("WARNING: Found la-decl-2nd without stem in -i: %s" % unicode(t)) elif tn == "la-decl-2nd-N": stem = getparam(t, "1") if stem.endswith("i"): blib.set_template_name(t, "la-decl-2nd-N-ium") t.add("1", stem[:-1]) notes.append("Fix noun in -ium to use {{la-decl-2nd-N-ium}}") else: pagemsg("WARNING: Found la-decl-2nd-N without stem in -i: %s" % unicode(t)) return unicode(parsed), notes
def do_one_page_verb(page, index, text): pagename = page.title() verbcount = 0 verbids = [] for template in text.filter_templates(): if template.name == "ar-conj": verbcount += 1 vnvalue = getparam(template, "vn") uncertain = False if vnvalue.endswith("?"): vnvalue = vnvalue[:-1] msg("Page %s %s: Verbal noun(s) identified as uncertain" % ( index, pagename)) uncertain = True if not vnvalue: continue vns = re.split(u"[,،]", vnvalue) form = getparam(template, "1") verbid = "#%s form %s" % (verbcount, form) if re.match("^[1I](-|$)", form): verbid += " (%s,%s)" % (getparam(template, "2"), getparam(template, "3")) no_i3rab_vns = [] for vn in vns: no_i3rab_vns.append(remove_i3rab(pagename, index, verbid, vn)) newvn = ",".join(no_i3rab_vns) if uncertain: newvn += "?" if newvn != vnvalue: msg("Page %s %s: Verb %s, replacing %s with %s" % ( index, pagename, verbid, vnvalue, newvn)) addparam(template, "vn", newvn) verbids.append(verbid) return text, "Remove i3rab from verbal nouns for verb(s) %s" % ( ', '.join(verbids))
def undo_one_page_greek_removal(page, index, text): def pagemsg(txt): msg("Page %s %s: %s" % (index, unicode(page.title()), txt)) template = blib.parse_text(template_text).filter_templates()[0] orig_template = unicode(template) if getparam(template, "sc") == "polytonic": template.remove("sc") to_template = unicode(template) param_value = getparam(template, removed_param) template.remove(removed_param) from_template = unicode(template) text = unicode(text) found_orig_template = orig_template in text newtext = text.replace(from_template, to_template) changelog = "" if newtext == text: if not found_orig_template: pagemsg("WARNING: Unable to locate 'from' template when undoing Greek param removal: %s" % from_template) else: pagemsg("Original template found, taking no action") else: if found_orig_template: pagemsg("WARNING: Undid removal, but original template %s already present!" % orig_template) if len(newtext) - len(text) != len(to_template) - len(from_template): pagemsg("WARNING: Length mismatch when undoing Greek param removal, may have matched multiple templates: from=%s, to=%s" % ( from_template, to_template)) changelog = "Undid removal of %s=%s in %s" % (removed_param, param_value, to_template) pagemsg("Change log = %s" % changelog) return newtext, changelog
def rewrite_one_page_verb_headword(page, index, text): pagetitle = page.title() msg("Processing page %s" % pagetitle) actions_taken = [] for template in text.filter_templates(): if template.name in ["ar-verb"]: origtemp = unicode(template) form = getparam(template, "form") if form: # In order to keep in the same order, just forcibly change the # param "names" (numbers) for pno in xrange(10, 0, -1): if template.has(str(pno)): template.get(str(pno)).name = str(pno + 1) # Make sure form= param is first ... template.remove("form") addparam(template, "form", canonicalize_form(form), before=template.params[0].name if len(template.params) > 0 else None) # ... then forcibly change its name to 1= template.get("form").name = "1" template.get("1").showkey = False newtemp = unicode(template) if origtemp != newtemp: msg("Replacing %s with %s" % (origtemp, newtemp)) if re.match("^[1I](-|$)", form): actions_taken.append("form=%s (%s/%s)" % (form, getparam(template, "2"), getparam(template, "3"))) else: actions_taken.append("form=%s" % form) changelog = "ar-verb: form= -> 1= and canonicalize to Roman numerals, move other params up: %s" % '; '.join(actions_taken) if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog
def fix_cite_book_params(t): origt = unicode(t) if getparam(t, "origyear").strip() and getparam(t, "year").strip(): if getparam(t, "year_published"): pagemsg("WARNING: Would set year_published= but is already present: %s" % unicode(t)) else: rmparam(t, "year_published") # in case of blank param t.get("year").name = "year_published" t.get("origyear").name = "year" pagemsg("year -> year_published, origyear -> year") move_param(t, "origdate", "date") move_param(t, "origmonth", "month") def frob_isbn(idval): isbn_re = r"^(\s*)(10-ISBN +|ISBN-13 +|ISBN:? +|ISBN[-=] *)" if re.search(isbn_re, idval, re.I): return re.sub(isbn_re, r"\1", idval, 0, re.I) elif re.search(r"^[0-9]", idval.strip()): return idval else: pagemsg("WARNING: Would replace id= -> isbn= but id=%s doesn't begin with 'ISBN '" % idval.replace("\n", r"\n")) return None move_param(t, "id", "isbn", frob_isbn) fix_page_params(t) return origt != unicode(t)
def sub_template(val): val = re.sub(r"\{\{\{1\|?\}\}\}", getparam(template, "1"), val) val = re.sub(r"\{\{\{2\|?\}\}\}", getparam(template, "2"), val) val = re.sub(r"\{\{\{pp\|(.*?)\}\}\}", lambda m: getparam(template, "pp") or m.group(1), val) return val
def replace_spenser_fq(m): template, text = m.groups() parsed = blib.parse_text(template) t = list(parsed.filter_templates())[0] par2 = getparam(t, "2") if par2: canto = arabic_to_roman(par2) if not canto: return m.group(0) t.add("canto", canto, before="2") rmparam(t, "2") par1 = getparam(t, "1") if par1: book = arabic_to_roman(par1) if not book: return m.group(0) t.add("book", book, before="1") rmparam(t, "1") text = re.sub(r"\s*<br */?>\s*", " / ", text) text = re.sub(r"^\{\{quote\|en\|(.*)\}\}$", r"\1", text) t.add("passage", text) blib.set_template_name(t, "RQ:Spenser Faerie Queene") notes.append( "reformat {{RQ:Spenser FQ}} into {{RQ:Spenser Faerie Queene}}") return unicode(t) + "\n"
def canon_param(pagetitle, index, template, param, paramtr, include_tempname_in_changelog=False): if isinstance(param, list): fromparam, toparam = param else: fromparam, toparam = (param, param) arabic = (pagetitle if fromparam == "page title" else getparam( template, fromparam)) latin = getparam(template, paramtr) if not arabic: return False canonarabic, canonlatin, actions = do_canon_param( pagetitle, index, template, fromparam, toparam, paramtr, arabic, latin, include_tempname_in_changelog) oldtempl = "%s" % unicode(template) if canonarabic: addparam(template, toparam, canonarabic) if canonlatin == True: template.remove(paramtr) elif canonlatin: addparam(template, paramtr, canonlatin) if canonarabic or canonlatin: msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) return actions
def canon_param(pagetitle, index, template, lang, param, paramtr, translit_module): if isinstance(param, list): fromparam, toparam = param else: fromparam, toparam = (param, param) foreign = (pagetitle if fromparam == "page title" else getparam( template, fromparam)) latin = getparam(template, paramtr) if not foreign: return False canonforeign, canonlatin, actions = do_canon_param(pagetitle, index, template, lang, fromparam, toparam, paramtr, foreign, latin, translit_module) oldtempl = "%s" % unicode(template) if canonforeign: add_param_handling_head(template, toparam, canonforeign) if canonlatin == True: template.remove(paramtr) elif canonlatin: addparam(template, paramtr, canonlatin) if canonforeign or canonlatin: msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) return actions
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] text = unicode(page.text) parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) origt = unicode(t) if tn == "head" and getparam(t, "1") == "ang" and getparam( t, "2") in ["adjective", "adjectives"]: pagemsg("WARNING: {{head}} for adjectives, should not occur: %s" % unicode(t)) elif tn == "ang-adj": if getparam(t, "1"): pagemsg("WARNING: 1= in ang-adj, should not occur: %s" % unicode(t)) else: head = getparam(t, "head") rmparam(t, "head") if head: t.add("1", head) notes.append("move head= to 1= in {{ang-adj}}") if unicode(t) != origt: pagemsg("Replaced %s with %s" % (origt, unicode(t))) return parsed, notes
def replace_trans(m, newlangcode, newlangname): prefix, transtext = m.groups() parsed = blib.parse_text(transtext) for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn in trans_templates: if getparam(t, "1") == "ku": t.add("1", newlangcode) rmparam(t, "sc") pagemsg( "Replaced %s with %s based on language prefix of translation entry" % (origt, unicode(t))) notes.append( "{{%s|ku}} -> {{%s|%s}} based on language prefix of translation entry" % (tn, tn, newlangcode)) elif tn == "t-simple": if getparam(t, "1") == "ku": if getparam(t, "langname" != "Kurdish"): pagemsg( "WARNING: Something wrong, t-simple|ku without langname=Kurdish: %s" % unicode(t)) else: t.add("1", newlangcode) t.add("langname", newlangname) pagemsg("Replaced %s with %s based on prefix" % (origt, unicode(t))) notes.append( "{{t-simple|ku|langname=Kurdish}} -> {{t-simple|%s|langname=%s}} based on language prefix" % (newlangcode, newlangname)) transtext = unicode(parsed) return prefix + transtext
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] for t in parsed.filter_templates(): if tname(t) == "R:Lexico": origt = unicode(t) rmparam(t, "lang") entry_uk = getparam(t, "entry_uk") if entry_uk: t.add("entry", entry_uk, before="entry_uk") rmparam(t, "entry_uk") url_uk = getparam(t, "url_uk") if url_uk: t.add("url", url_uk, before="url_uk") rmparam(t, "url_uk") p4 = getparam(t, "4") if p4: t.add("text", p4, before="4") rmparam(t, "4") newt = unicode(t) if origt != newt: notes.append("Remove/rearrange params in {{R:Lexico}}") pagemsg("Replaced %s with %s" % (origt, newt)) return parsed, notes
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) found_inflection_of = False found_head_verb_form = False for t in parsed.filter_templates(): if unicode(t.name) in ["inflection of"]: found_inflection_of = True if unicode(t.name) == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "verb form": found_head_verb_form = True if not found_head_verb_form or not found_inflection_of: # Find definition line foundrussian = False sections = re.split("(^==[^=]*==\n)", unicode(page.text), 0, re.M) for j in xrange(2, len(sections), 2): if sections[j-1] == "==Russian==\n": if foundrussian: pagemsg("WARNING: Found multiple Russian sections, skipping page") return foundrussian = True deflines = r"\n".join(re.findall(r"^(# .*)$", sections[j], re.M)) if not found_head_verb_form: pagemsg("WARNING: No {{head|ru|verb form}}: %s" % deflines) if not found_inflection_of: pagemsg("WARNING: No 'inflection of': %s" % deflines)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) text = unicode(page.text) parsed = blib.parse(page) for t in parsed.filter_templates(): if unicode(t.name) == "ru-IPA": origt = unicode(t) if getparam(t, "phon"): pagemsg("phon= already present: %s" % unicode(t)) else: phon = getparam(t, "1") pagemsg("Adding phon=: %s" % unicode(t)) rmparam(t, "1") t.add("phon", phon) pagemsg("Replaced %s with %s" % (origt, unicode(t))) newtext = unicode(parsed) if newtext != text: if verbose: pagemsg("Replacing <<%s>> with <<%s>>" % (text, newtext)) comment = "Add phon= to ru-IPA templates" if save: pagemsg("Saving with comment = %s" % comment) page.text = newtext page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) else: pagemsg("Skipping")
def process_page(page, index): global args pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose) parsed = blib.parse(page) for t in parsed.filter_templates(): tn = tname(t) if tn == "fr-IPA": posval = getparam(t, "pos") pos_arg = "|pos=%s" % posval if posval else "" max_arg = 1 for pronarg in xrange(2, 30): if getparam(t, str(pronarg)): max_arg = pronarg for pronarg in xrange(1, max_arg + 1): pronval = getparam(t, str(pronarg)) or pagetitle pron = expand_text( "{{#invoke:fr-pron|show|%s%s|check_new_module=1}}" % (pronval, pos_arg)) if " || " in pron: pronold, pronnew = pron.split(" || ") pagemsg( "WARNING: {{fr-IPA|%s%s}} == %s in old but %s in new" % (pronval, pos_arg, pronold, pronnew)) else: pagemsg("{{fr-IPA|%s%s}} == %s in both old and new" % (pronval, pos_arg, pron))
def process_text_on_page(index, pagetitle, text): global args def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose) notes = [] parsed = blib.parse_text(text) rhymes_templates = args.rhymes_templates.decode("utf-8").split(",") if args.skip_langs: skip_lang_codes = args.skip_langs.decode("utf-8").split(",") else: skip_lang_codes = [] if args.include_langs: include_lang_codes = args.include_langs.decode("utf-8").split(",") else: include_lang_codes = [] for t in parsed.filter_templates(): if tname(t) in rhymes_templates: langcode = getparam(t, "1") if include_lang_codes and getparam(t, "1") not in include_lang_codes: continue if skip_lang_codes and langcode in skip_lang_codes: continue expanded = expand_text(unicode(t)) if not expanded: continue for cattext in re.findall(r"\[\[Category:Rhymes:.*?\]\]", expanded): pagemsg("Found rhymes category: %s" % cattext[2:-2])
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) notes = [] for t in parsed.filter_templates(): tn = tname(t) if tn.startswith("ang-decl-"): origt = unicode(t) alt1 = getparam(t, "alt1") if alt1: t.add("1", alt1, before="alt1") rmparam(t, "alt1") alt2 = getparam(t, "alt2") if alt2: t.add("2", alt2, before="alt2") rmparam(t, "alt2") altnomsg = getparam(t, "altnomsg") if altnomsg: t.add("nomsg", altnomsg, before="altnomsg") rmparam(t, "altnomsg") if unicode(t) != origt: pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append("move alt param to main param in {{ang-decl-*}}") return unicode(parsed), notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) notes = [] parsed = blib.parse(page) for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj", "ru-conj-old"]: verbtype = getparam(t, "2") if verbtype in [ "pf", "pf-intr", "pf-refl", "pf-impers", "pf-intr-impers", "pf-refl-impers", "impf", "impf-intr", "impf-refl", "impf-impers", "impf-intr-impers", "impf-refl-impers" ]: conjtype = getparam(t, "1") t.add("2", conjtype) t.add("1", verbtype) notes.append("move verb type from arg 2 to arg 1") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes
def process_page_for_fix(page, index, parsed): pagename = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagename, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, pagename, txt)) pagemsg("Processing") notes = [] text = unicode(page.text) newtext = re.sub(r"\[\[(.*?)\]\]", r"{{l|kmr|\1}}", text) if newtext != text: notes.append("convert raw links to {{l|kmr|...}}") text = newtext parsed = blib.parse_text(text) for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn in ["l", "rhymes nav"] and getparam(t, "1") == "ku": t.add("1", "kmr") notes.append("convert {{%s|ku}} to {{%s|kmr}}" % (tn, tn)) elif getparam(t, "1") == "ku": pagemsg("WARNING: Kurdish-language template of unrecognized name: %s" % unicode(t)) if origt != unicode(t): pagemsg("Replaced %s with %s" % (origt, unicode(t))) text = unicode(parsed) return text, notes
def hack_templates(parsed, subsectitle): for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn in quote_templates: if not getparam(t, "nocat"): continue if getparam(t, "lang").strip() != "en": continue notes.append( "convert nocat=1 in lang=en Translingual section to termlang=mul" ) # Fetch all params. params = [] for param in t.params: pname = unicode(param.name) if pname.strip() != "nocat": params.append((pname, param.value, param.showkey)) # Erase all params. del t.params[:] # Put lang and termlang parameters. newline = "\n" if "\n" in unicode(t.name) else "" t.add("lang", "en" + newline, preserve_spacing=False) t.add("termlang", "mul" + newline, preserve_spacing=False) # Put remaining parameters in order. for name, value, showkey in params: t.add(name, value, showkey=showkey, preserve_spacing=False) pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t)))
def process_page(index, page): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) foundrussian = False sections = re.split("(^==[^=]*==\n)", text, 0, re.M) for j in xrange(2, len(sections), 2): if sections[j-1] == "==Russian==\n": if foundrussian: pagemsg("WARNING: Found multiple Russian sections, skipping page") return foundrussian = True found_headword_template = False parsed = blib.parse_text(sections[j]) for t in parsed.filter_templates(): tname = unicode(t.name) if tname == "ru-adj" or (tname == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "adjective form"): found_headword_template = True if not found_headword_template and "===Adjective===" in sections[j]: pagemsg("WARNING: Missing adj headword template")
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") if not pagetitle.endswith(u"ся"): return text = unicode(page.text) notes = [] parsed = blib.parse(page) for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-decl-adj", "ru-adj-old"] and getparam(t, "suffix") == u"ся": lemma = getparam(t, "1") lemma = re.sub(",", u"ся,", lemma) lemma = re.sub("$", u"ся", lemma) t.add("1", lemma) rmparam(t, "suffix") notes.append(u"move suffix=ся to lemma") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) pagemsg("Processing") parsed = blib.parse(page) for t in parsed.filter_templates(): if unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam(t, "1").startswith("pf"): if tname == "ru-conj": tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t)) else: tempcall = re.sub(r"\{\{ru-conj-old", "{{ru-generate-verb-forms|old=y", unicode(t)) result = expand_text(tempcall) if not result: pagemsg("WARNING: Error generating forms, skipping") continue args = rulib.split_generate_args(result) for base in ["past_pasv_part", "ppp"]: for i in ["", "2", "3", "4", "5", "6", "7", "8", "9"]: val = getparam(t, base + i) if val and val != "-": val = re.sub("//.*", "", val) pagemsg("Found perfective past passive participle: %s" % val)
def process_text_on_page(index, pagename, text): def pagemsg(txt): msg("Page %s %s: %s" % (index, pagename, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, pagename, txt)) pagemsg("Processing") notes = [] parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) origt = unicode(t) if tn == "RQ:Buk Baibel": param1 = getparam(t, "1") if param1 in book_map: t.add("1", book_map[param1]) notes.append("convert '%s' to '%s' in 1= in {{%s}}" % (param1, book_map[param1], tn)) param4 = getparam(t, "4") if param4: t.add("passage", param4, before="4") rmparam(t, "4") notes.append("4= -> passage= in {{%s}}" % tn) if unicode(t) != origt: pagemsg("Replaced %s with %s" % (origt, unicode(t))) return unicode(parsed), notes
def process_text_on_page(index, pagename, text): def pagemsg(txt): msg("Page %s %s: %s" % (index, pagename, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, pagename, txt)) pagemsg("Processing") notes = [] parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) if tn == "head" and getparam(t, "1") == "la": pos = getparam(t, "2") if pos not in pos_to_template: pagemsg("WARNING: Saw unrecognized part of speech %s: %s" % (pos, unicode(t))) continue if getparam(t, "3") or getparam(t, "head"): pagemsg("WARNING: Saw 3= or head=: %s" % unicode(t)) continue origt = unicode(t) t.add("1", pagename) blib.set_template_name(t, pos_to_template[pos]) rmparam(t, "2") t.add("FIXME", "1") pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append("replace {{head|la|%s}} with {{%s}}" % (pos, tname(t))) return unicode(parsed), notes
def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn == "rfdef": if getparam(t, "lang"): pagemsg("WARNING: has lang=, skipping: %s" % unicode(t)) continue lang = getparam(t, "1") if lang in langs_to_convert: newlang = langs_to_convert[lang] t.add("1", newlang) notes.append("convert {{rfdef|%s}} to {{rfdef|%s}}" % (lang, newlang)) lang = newlang if lang in langs_to_remove_sort: if t.has("sort"): rmparam(t, "sort") notes.append( "remove sort= from {{rfdef|%s}}, now auto-computed" % lang) if unicode(t) != origt: pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t))) return unicode(parsed), notes
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) pagemsg("Processing") parsed = blib.parse(page) for t in parsed.filter_templates(): if unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam( t, "1").startswith("pf"): if tname == "ru-conj": tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t)) else: tempcall = re.sub(r"\{\{ru-conj-old", "{{ru-generate-verb-forms|old=y", unicode(t)) result = expand_text(tempcall) if not result: pagemsg("WARNING: Error generating forms, skipping") continue args = blib.split_generate_args(result) for base in ["past_pasv_part", "ppp"]: for i in ["", "2", "3", "4", "5", "6", "7", "8", "9"]: val = getparam(t, base + i) if val and val != "-": val = re.sub("//.*", "", val) pagemsg( "Found perfective past passive participle: %s" % val)
def process_page(templates, index, page, save=False, verbose=False): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) if not page.exists(): pagemsg("WARNING: Page doesn't exist") return parsed = blib.parse(page) should_save = False for t in parsed.filter_templates(): if unicode(t.name) in templates: origt = unicode(t) # Punt if multi-arg-set, can't handle yet should_continue = False for param in t.params: if not param.showkey: val = unicode(param.value) if val == "or": pagemsg("WARNING: Can't handle multi-decl templates: %s" % unicode(t)) should_continue = True break if val == "-" or val == "_" or val.startswith("join:"): pagemsg("WARNING: Can't handle multi-word templates: %s" % unicode(t)) should_continue = True break if should_continue: continue if arg1_is_stress(getparam(t, "1")): oldplarg = "5" newplarg = "4" else: oldplarg = "4" newplarg = "3" plstem = getparam(t, oldplarg) if plstem: if getparam(t, newplarg): pagemsg("WARNING: Something wrong, found args in both positions %s and %s: %s" % (newplarg, oldplarg, unicode(t))) continue rmparam(t, oldplarg) t.add(newplarg, plstem) should_save = True pagemsg("Replacing %s with %s" % (origt, unicode(t))) if should_save: comment = "Move plstem from 5th/4th argument to 4th/3rd" if save: pagemsg("Saving with comment = %s" % comment) page.text = unicode(parsed) page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_template(pagetitle, index, template, ruparam, trparam, output_line, find_accents, verbose): origt = unicode(template) saveparam = ruparam def pagemsg(text): msg("Page %s %s: %s" % (index, pagetitle, text)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, semi_verbose) if semi_verbose: pagemsg("Processing template: %s" % unicode(template)) if unicode(template.name) == "head": # Skip {{head}}. We don't want to mess with headwords. return False if isinstance(ruparam, list): ruparam, saveparam = ruparam if ruparam == "page title": val = pagetitle else: val = getparam(template, ruparam) valtr = getparam(template, trparam) if trparam else "" changed = False if find_accents: newval, newtr = find_accented(val, valtr, verbose, pagemsg, expand_text, origt) if newval != val or newtr != valtr: if ru.remove_accents(newval) != ru.remove_accents(val): pagemsg("WARNING: Accented page %s changed from %s in more than just accents, not changing" % (newval, val)) else: changed = True addparam(template, saveparam, newval) if newtr: if not trparam: pagemsg("WARNING: Unable to change translit to %s because no translit param available (Cyrillic param %s): %s" % (newtr, saveparam, origt)) elif unicode(template.name) in ["ru-ux"]: pagemsg("WARNING: Not changing or adding translit param %s=%s to ru-ux: origt=%s" % ( trparam, newtr, origt)) else: if valtr and valtr != newtr: pagemsg("WARNING: Changed translit param %s from %s to %s: origt=%s" % (trparam, valtr, newtr, origt)) if not valtr: pagemsg("NOTE: Added translit param %s=%s to template: origt=%s" % (trparam, newtr, origt)) addparam(template, trparam, newtr) elif valtr: pagemsg("WARNING: Template has translit %s but lookup result has none, leaving translit alone: origt=%s" % (valtr, origt)) if check_need_accent(newval): output_line("Need accents (changed)") else: output_line("Found accents") if not changed and check_need_accent(val): output_line("Need accents") if changed: pagemsg("Replaced %s with %s" % (origt, unicode(template))) return ["auto-accent %s%s" % (newval, "//%s" % newtr if newtr else "")] if changed else False
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj"]: conjtype = getparam(t, "1") if conjtype.startswith("6a"): param6 = getparam(t, "6") if param6: rmparam(t, "6") if not getparam(t, "5"): rmparam(t, "5") for i in xrange(1, 4): if not t.has(str(i)): t.add(str(i), "") t.add("4", param6) notes.append("move type 6a arg6 -> arg4") if conjtype.startswith("7b"): param7 = getparam(t, "7") if param7: rmparam(t, "7") for i in xrange(1, 6): if not t.has(str(i)): t.add(str(i), "") t.add("6", param7) notes.append("move type 7b arg7 -> arg6") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) if unicode(t.name) in ["ru-conj-7a", "ru-conj-7b"]: past_stem = getparam(t, "4") vowel_end = re.search(u"[аэыоуяеиёю́]$", past_stem) past_m = getparam(t, "past_m") past_f = getparam(t, "past_f") past_n = getparam(t, "past_n") past_pl = getparam(t, "past_pl") if past_m or past_f or past_n or past_pl: upast_stem = ru.make_unstressed(past_stem) expected_past_m = past_stem + (u"л" if vowel_end else "") expected_past_f = upast_stem + u"ла́" expected_past_n = upast_stem + u"ло́" expected_past_pl = upast_stem + u"ли́" if ((not past_m or expected_past_m == past_m) and expected_past_f == past_f and expected_past_n == past_n and expected_past_pl == past_pl): msg("Would remove past overrides and add arg5=b") else: msg("WARNING: Remaining past overrides: past_m=%s, past_f=%s, past_n=%s, past_pl=%s, expected_past_m=%s, expected_past_f=%s, expected_past_n=%s, expected_past_pl=%s" % (past_m, past_f, past_n, past_pl, expected_past_m, expected_past_f, expected_past_n, expected_past_pl)) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) subpagetitle = re.sub("^.*:", "", pagetitle) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") if ":" in pagetitle: pagemsg("WARNING: Colon in page title, skipping page") return text = unicode(page.text) notes = [] foundrussian = False sections = re.split("(^==[^=]*==\n)", text, 0, re.M) for j in xrange(2, len(sections), 2): if sections[j-1] == "==Russian==\n": if foundrussian: pagemsg("WARNING: Found multiple Russian sections, skipping page") return foundrussian = True # Remove gender from adjective forms parsed = blib.parse_text(sections[j]) for t in parsed.filter_templates(): if unicode(t.name) == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "adjective form": origt = unicode(t) rmparam(t, "g") rmparam(t, "g2") rmparam(t, "g3") rmparam(t, "g4") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) notes.append("remove gender from adjective forms") sections[j] = unicode(parsed) new_text = "".join(sections) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(blib.group_notes(notes)) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) param2 = getparam(t, "2") if unicode(t.name) in ["ru-conj"] and re.search(r"^8[ab]", param2): if [x for x in t.params if unicode(x.value) == "or"]: pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t)) continue past_m = getparam(t, "past_m") if past_m: rmparam(t, "past_m") stem = getparam(t, "3") if stem == past_m: pagemsg("Stem %s and past_m same" % stem) notes.append("remove redundant past_m %s" % past_m) elif (param2.startswith("8b") and not param2.startswith("8b/") and ru.make_unstressed(past_m) == stem): pagemsg("Class 8b/b and stem %s is unstressed version of past_m %s, replacing stem with past_m" % ( stem, past_m)) t.add("3", past_m) notes.append("moving past_m %s to arg 3" % past_m) else: pagemsg("Stem %s and past_m %s are different, putting past_m in param 5" % ( stem, past_m)) t.add("5", past_m) notes.append("moving past_m %s to arg 5" % past_m) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def vocalize_param(pagetitle, index, template, param, paramtr): arabic = getparam(template, param) latin = getparam(template, paramtr) if not arabic: return False if latin: vocalized = do_vocalize_param(pagetitle, index, template, param, arabic, latin) if vocalized: oldtempl = "%s" % unicode(template) addparam(template, param, vocalized) msg("Page %s %s: Replaced %s with %s" % (index, pagetitle, oldtempl, unicode(template))) return vocalized return True
def fix_page_params(t): origt = unicode(t) for param in ["page", "pages"]: pageval = getparam(t, param) if re.search(r"^\s*pp?\.\s*", pageval): pageval = re.sub(r"^(\s*)pp?\.\s*", r"\1", pageval) t.add(param, pageval) notes.append("remove p(p). from %s=" % param) pagemsg("remove p(p). from %s=" % param) if re.search(r"^[0-9]+$", getparam(t, "pages").strip()): move_param(t, "pages", "page") if re.search(r"^[0-9]+[-–—]$", getparam(t, "page").strip()): move_param(t, "page", "pages") return origt != unicode(t)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) found_audio = False for t in parsed.filter_templates(): if unicode(t.name) == "audio" and getparam(t, "lang") == "ru": found_audio = True break if found_audio: new_text = re.sub(r"\n*\[\[Category:Russian terms with audio links]]\n*", "\n\n", text) if new_text != text: comment = "Remove redundant [[:Category:Russian terms with audio links]]" if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def process_new_style_headword(htemp): # Split out the arg sets in the declension and check the # lemma of each one, taking care to handle cases where there is no lemma # (it would default to the page name). highest_numbered_param = 0 for p in htemp.params: pname = unicode(p.name) if re.search("^[0-9]+$", pname): highest_numbered_param = max(highest_numbered_param, int(pname)) # Now split based on arg sets. arg_set = [] for i in xrange(1, highest_numbered_param + 2): end_arg_set = False val = getparam(htemp, str(i)) if (i == highest_numbered_param + 1 or val in ["or", "_", "-"] or re.search("^join:", val)): end_arg_set = True if end_arg_set: process_arg_set(arg_set) arg_set = [] else: arg_set.append(val)
def process_page(index, page, save, verbose, direc): pagetitle = unicode(page.title()) subpagetitle = re.sub(".*:", "", pagetitle) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] text = unicode(page.text) parsed = blib.parse(page) def frob_gender_param(t, param): val = getparam(t, param) if val == "n": t.add(param, "n-in") elif val == "n-p": t.add(param, "n-in-p") for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun+", "ru-noun-table"]: origt = unicode(t) for param in t.params: if unicode(param.name) != "1": pagemsg("WARNING: Found other than a single param in template, skipping: %s" % unicode(t)) return FIXME if origt != unicode(t): param3 = getparam(t, "3") if param3 != "-": if fix_indeclinable: if param3: pagemsg("WARNING: Can't make indeclinable, has genitive singular given: %s" % origt) return else: t.add("3", "-") notes.append("make indeclinable") pagemsg("Making indeclinable: %s" % unicode(t)) else: pagemsg("WARNING: Would add inanimacy to neuter, but isn't marked as indeclinable: %s" % origt) return pagemsg("Replacing %s with %s" % (origt, unicode(t))) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) if notes: comment = "Add inanimacy to neuters (%s)" % "; ".join(notes) else: comment = "Add inanimacy to neuters" if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def infer_one_page_decls_1(page, index, text): def pagemsg(txt): msg("Page %s %s: %s" % (index, unicode(page.title()), txt)) for tempname in decl_templates: for t in text.filter_templates(): if unicode(t.name).strip() == tempname: orig_template = unicode(t) args = infer_decl(t, pagemsg) if not args: # At least combine stem and declension, blanking decl when possible. stem, decl = combine_stem(getparam(t, "1"), getparam(t, "2")) t.add("1", stem) t.add("2", decl) # Remove any trailing blank arguments. for i in xrange(15, 0, -1): if not getparam(t, i): rmparam(t, i) else: break new_template = unicode(t) if orig_template != new_template: if not compare_results(orig_template, new_template, pagemsg): return None, None else: for i in xrange(15, 0, -1): rmparam(t, i) rmparam(t, "short_m") rmparam(t, "short_f") rmparam(t, "short_n") rmparam(t, "short_p") t.name = tempname i = 1 for arg in args: if "=" in arg: name, value = re.split("=", arg) t.add(name, value) else: t.add(i, arg) i += 1 new_template = unicode(t) if orig_template != new_template: if verbose: pagemsg("Replacing %s with %s" % (orig_template, new_template)) return text, "Convert adj decl to new form and infer short-accent pattern"
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): origt = unicode(t) param1 = getparam(t, "1") if unicode(t.name) in ["ru-conj"]: if re.search(r"^6[ac]", param1): if getparam(t, "no_iotation"): rmparam(t, "no_iotation") if param1.startswith("6a"): notes.append(u"6a + no_iotation -> 6°a") else: notes.append(u"6c + no_iotation -> 6°c") t.add("1", re.sub("^6", u"6°", param1)) elif re.search(r"^6b", param1): notes.append(u"6b -> 6°b") t.add("1", re.sub("^6", u"6°", param1)) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def check_for_al(param): param = remove_links(param) value = getparam(headword_template, param) if value: if '[' in value or ']' in value or '|' in value: pagemsg("Param %s value %s has link in it" % (param, value)) add_note("removed links from %s" % param) value = remove_links(value) putp(param, remove_al(value))
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): if (unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam(t, "2") in ["7a", "7b"]): if [x for x in t.params if unicode(x.value) == "or"]: pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t)) continue if t.has("past_adv_part_short") and getparam(t, "past_adv_part_short") == "": notes.append("set past_adv_part_short=-") origt = unicode(t) t.add("past_adv_part_short", "-") pagemsg("Replacing %s with %s" % (origt, unicode(t))) if t.has("past_actv_part") and getparam(t, "past_actv_part") == "": notes.append("set past_actv_part=-") origt = unicode(t) t.add("past_actv_part", "-") pagemsg("Replacing %s with %s" % (origt, unicode(t))) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) if not notes: pagemsg("WARNING: No changes")
def get_form_class(k): formclass = None parsed = blib.parse_text(etymologies[j]) for t in parsed.filter_templates(): if t.name in ["ar-verb", "ar-verb-form"]: newformclass = getparam(t, "1") if formclass and newformclass and formclass != newformclass: pagemsg("WARNING: Something wrong: Two different verb form classes in same etymology: %s != %s" % (formclass, newformclass)) formclass = newformclass return formclass
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): if unicode(t.name) == "ru-phrase": if t.has("tr"): pagemsg("WARNING: Has tr=: %s" % unicode(t)) if t.has("head"): if t.has("1"): pagemsg("WARNING: Has both head= and 1=: %s" % unicode(t)) else: notes.append("ru-phrase: convert head= to 1=") origt = unicode(t) head = getparam(t, "head") rmparam(t, "head") tr = getparam(t, "tr") rmparam(t, "tr") t.add("1", head) if tr: t.add("tr", tr) pagemsg("Replacing %s with %s" % (origt, unicode(t))) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment)
def fetch_numbered_params(t): p = [] for i in xrange(1,10): val = getparam(t, str(i)) or "" p.append(val) for i in xrange(8,-1,-1): if p[i]: break else: del p[i] return p