def fix_smp(save, verbose, startFrom, upTo): for template in arabic_decl_templates: # Fix the template refs. If cap= is present, remove it; else, add lc=. def fix_one_page_smp(page, index, text): pagetitle = page.title() for t in text.filter_templates(): head = reorder_shadda(getparam(t, "1")) if t.name.startswith("ar-decl-"): param = "pl" pl = getparam(t, param) i = 2 while pl: if pl == "smp": if head.endswith(TAM): msg("Page %s %s: WARNING: Found %s=smp with feminine ending head %s in %s: not changing" % ( index, pagetitle, param, head, t.name)) else: msg("Page %s %s: Changing %s=smp to %s=sp in %s" % ( index, pagetitle, param, param, t.name)) addparam(t, param, "sp") param = "pl%s" % i pl = getparam(t, param) i += 1 changelog = "Change pl=smp to pl=sp" msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog)) return text, changelog for page, index in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, fix_one_page_smp, save=save, verbose=verbose)
def fix_smp(save, verbose, startFrom, upTo): for template in arabic_decl_templates: # Fix the template refs. If cap= is present, remove it; else, add lc=. def fix_one_page_smp(page, index, text): pagetitle = page.title() for t in text.filter_templates(): head = reorder_shadda(getparam(t, "1")) if t.name.startswith("ar-decl-"): param = "pl" pl = getparam(t, param) i = 2 while pl: if pl == "smp": if head.endswith(TAM): msg("Page %s %s: WARNING: Found %s=smp with feminine ending head %s in %s: not changing" % (index, pagetitle, param, head, t.name)) else: msg("Page %s %s: Changing %s=smp to %s=sp in %s" % (index, pagetitle, param, param, t.name)) addparam(t, param, "sp") param = "pl%s" % i pl = getparam(t, param) i += 1 changelog = "Change pl=smp to pl=sp" msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog)) return text, changelog for index, page in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, fix_one_page_smp, save=save, verbose=verbose)
def do_nouns(poses, headtempls, save, startFrom, upTo): def do_one_page_noun(page, index, text): pagename = page.title() nouncount = 0 nounids = [] for template in text.filter_templates(): if template.name in headtempls: nouncount += 1 params_done = [] entry = getparam(template, "1") for param in template.params: value = param.value newvalue = remove_i3rab(pagename, index, entry, unicode(value)) if newvalue != value: param.value = newvalue params_done.append(unicode(param.name)) if params_done: nounids.append("#%s %s %s (%s)" % (nouncount, template.name, entry, ", ".join(params_done))) return text, "Remove i3rab from params in %s" % ( '; '.join(nounids)) for pos in poses: for index, page in blib.cat_articles("Arabic %ss" % pos.lower(), startFrom, upTo): blib.do_edit(page, index, do_one_page_noun, save=save, verbose=verbose)
def do_pages(createfn, iterfn=iter_pages): pages = iterfn(createfn) for current, index in blib.iter_pages(pages, startFrom, upTo, key=lambda x: x[0]): pagename, text, changelog = current pagetitle = remove_diacritics(pagename) if params.offline: msg("Text for %s: [[%s]]" % (pagename, text)) msg("Changelog = %s" % changelog) else: page = pywikibot.Page(site, pagetitle) if page.exists(): msg("Page %s %s: WARNING, page already exists, skipping" % (index, pagename)) else: def save_text(page, index, parsed): return text, changelog blib.do_edit(page, index, save_text, save=params.save, verbose=params.verbose)
def rewrite_pages(refrom, reto, refs, cat, pages, pagefile, pagetitle_sub, comment, filter_pages, save, verbose, startFrom, upTo): def rewrite_one_page(page, index, text): #blib.msg("From: [[%s]], To: [[%s]]" % (refrom, reto)) text = unicode(text) text = reorder_shadda(text) zipped_fromto = zip(refrom, reto) for fromval, toval in zipped_fromto: if pagetitle_sub: pagetitle = unicode(page.title()) fromval = fromval.replace(pagetitle_sub, re.escape(pagetitle)) toval = toval.replace(pagetitle_sub, pagetitle) text = re.sub(fromval, toval, text) return text, comment or "replace %s" % (", ".join("%s -> %s" % (f, t) for f, t in zipped_fromto)) if pages: pages = ((pywikibot.Page(blib.site, page), index) for page, index in blib.iter_pages(pages, startFrom, upTo)) elif pagefile: lines = [x.strip() for x in codecs.open(pagefile, "r", "utf-8")] pages = ((pywikibot.Page(blib.site, page), index) for page, index in blib.iter_pages(lines, startFrom, upTo)) elif refs: pages = blib.references(refs, startFrom, upTo, includelinks=True) else: pages = blib.cat_articles(cat, startFrom, upTo) for page, index in pages: pagetitle = unicode(page.title()) if filter_pages and not re.search(filter_pages, pagetitle): blib.msg("Skipping %s because doesn't match --filter-pages regex %s" % (pagetitle, filter_pages)) else: if verbose: blib.msg("Processing %s" % pagetitle) blib.do_edit(page, index, rewrite_one_page, save=save, verbose=verbose)
def rewrite_ar_nisba(save, verbose, startFrom, upTo): for index, page in blib.references("Template:ar-nisba", startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_ar_nisba, save=save, verbose=verbose)
def delete_form(index, lemma, formind, formval, pos, tag_sets_to_delete, preserve_diaeresis, save, verbose, diff): def pagemsg(txt): msg("Page %s %s: form %s %s: %s" % (index, lemma, formind, formval, txt)) if "[" in formval: pagemsg("Skipping form value %s with link in it" % formval) return page = pywikibot.Page(site, remove_macrons(formval, preserve_diaeresis)) if not page.exists(): pagemsg("Skipping form value %s, page doesn't exist" % formval) return def do_delete_form_1(page, index, parsed): return delete_form_1(page, index, lemma, formind, formval, pos, tag_sets_to_delete, preserve_diaeresis) blib.do_edit(page, index, do_delete_form_1, save=save, verbose=verbose, diff=diff)
def canonicalize_verb_form(save, startFrom, upTo, tempname, formarg): # Canonicalize the form in ar-conj. # Returns the changed text along with a changelog message. def canonicalize_one_page_verb_form(page, index, text): pagetitle = page.title() msg("Processing page %s" % pagetitle) actions_taken = [] for template in text.filter_templates(): if template.name == tempname: origtemp = unicode(template) form = getparam(template, formarg) if form: addparam(template, formarg, canonicalize_form(form)) newtemp = unicode(template) if origtemp != newtemp: msg("Replacing %s with %s" % (origtemp, newtemp)) if re.match("^[1I](-|$)", form): actions_taken.append("form=%s (%s/%s)" % (form, getparam(template, str(1+int(formarg))), getparam(template, str(2+int(formarg))))) else: actions_taken.append("form=%s" % form) changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % ( tempname, formarg, '; '.join(actions_taken)) if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog for page, index in blib.references("Template:%s" % tempname, startFrom, upTo): blib.do_edit(page, index, canonicalize_one_page_verb_form, save=save)
def do_nouns(poses, headtempls, save, startFrom, upTo): def do_one_page_noun(page, index, text): pagename = page.title() nouncount = 0 nounids = [] for template in text.filter_templates(): if template.name in headtempls: nouncount += 1 params_done = [] entry = getparam(template, "1") for param in template.params: value = param.value newvalue = remove_i3rab(pagename, index, entry, unicode(value)) if newvalue != value: param.value = newvalue params_done.append(unicode(param.name)) if params_done: nounids.append("#%s %s %s (%s)" % (nouncount, template.name, entry, ", ".join(params_done))) return text, "Remove i3rab from params in %s" % ( '; '.join(nounids)) for pos in poses: for page, index in blib.cat_articles("Arabic %ss" % pos.lower(), startFrom, upTo): blib.do_edit(page, index, do_one_page_noun, save=save, verbose=verbose)
def fix_tool_place_noun(save, verbose, startFrom, upTo): for template in ["ar-tool noun", "ar-noun of place", "ar-instance noun"]: # Fix the template refs. If cap= is present, remove it; else, add lc=. def fix_one_page_tool_place_noun(page, index, text): pagetitle = page.title() for t in text.filter_templates(): if t.name == template: if getparam(t, "cap"): msg("Page %s %s: Template %s: Remove cap=" % (index, pagetitle, template)) t.remove("cap") else: msg("Page %s %s: Template %s: Add lc=1" % (index, pagetitle, template)) addparam(t, "lc", "1") changelog = "%s: If cap= is present, remove it, else add lc=" % template msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog)) return text, changelog for index, page in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, fix_one_page_tool_place_noun, save=save, verbose=verbose)
def rewrite_verb_headword(save, startFrom, upTo): for cat in [u"Arabic verbs"]: for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_verb_headword, save=save)
def process_text_on_non_lemma_page(index, pagetitle, text): global args def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) if tn == "superlative of" and getparam(t, "1") == "it": lemma = getparam(t, "2") def do_process(page, index, parsed): return process_lemma_page(page, index, pagetitle) blib.do_edit(pywikibot.Page(site, lemma), index, do_process, save=args.save, verbose=args.verbose, diff=args.diff)
def canonicalize_verb_form(save, startFrom, upTo, tempname, formarg): # Canonicalize the form in ar-conj. # Returns the changed text along with a changelog message. def canonicalize_one_page_verb_form(page, index, text): pagetitle = page.title() msg("Processing page %s" % pagetitle) actions_taken = [] for template in text.filter_templates(): if template.name == tempname: origtemp = unicode(template) form = getparam(template, formarg) if form: addparam(template, formarg, canonicalize_form(form)) newtemp = unicode(template) if origtemp != newtemp: msg("Replacing %s with %s" % (origtemp, newtemp)) if re.match("^[1I](-|$)", form): actions_taken.append( "form=%s (%s/%s)" % (form, getparam(template, str(1 + int(formarg))), getparam(template, str(2 + int(formarg))))) else: actions_taken.append("form=%s" % form) changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % ( tempname, formarg, '; '.join(actions_taken)) if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog for index, page in blib.references("Template:%s" % tempname, startFrom, upTo): blib.do_edit(page, index, canonicalize_one_page_verb_form, save=save)
def rewrite_ru_decl_adj(save, verbose, startFrom, upTo): for cat in [u"Russian adjectives"]: for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_ru_decl_adj, save=save, verbose=verbose)
def process_headwords(save, verbose, startFrom, upTo): def process_page(page, index, text): return process_one_page_headwords(unicode(page.title()), index, text) #for page in blib.references(u"Template:tracking/ar-head/head", startFrom, upTo): #for page in blib.references("Template:ar-nisba", startFrom, upTo): for cat in [u"Arabic lemmas", u"Arabic non-lemma forms"]: for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, process_page, save=save, verbose=verbose)
def rewrite_arz_headword(save, verbose, startFrom, upTo): for cat in [u"Egyptian Arabic adjectives", "Egyptian Arabic nouns"]: for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_arz_headword, save=save, verbose=verbose)
def rewrite_idafa(save, verbose, startFrom, upTo): for template in arabic_decl_templates: for index, page in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_idafa, save=save, verbose=verbose)
def undo_greek_removal(save, verbose, direcfile, startFrom, upTo): template_removals = [] for line in codecs.open(direcfile, "r", encoding="utf-8"): line = line.strip() m = re.match(r"\* \[\[(.*?)]]: Removed (.*?)=.*?: <nowiki>(.*?)</nowiki>$", line) if not m: msg("WARNING: Unable to parse line: [%s]" % line) else: template_removals.append(m.groups()) for current, index in blib.iter_pages(template_removals, startFrom, upTo, # key is the page name key = lambda x: x[0]): pagename, removed_param, template_text = current def undo_one_page_greek_removal(page, index, text): def pagemsg(txt): msg("Page %s %s: %s" % (index, unicode(page.title()), txt)) template = blib.parse_text(template_text).filter_templates()[0] orig_template = unicode(template) if getparam(template, "sc") == "polytonic": template.remove("sc") to_template = unicode(template) param_value = getparam(template, removed_param) template.remove(removed_param) from_template = unicode(template) text = unicode(text) found_orig_template = orig_template in text newtext = text.replace(from_template, to_template) changelog = "" if newtext == text: if not found_orig_template: pagemsg("WARNING: Unable to locate 'from' template when undoing Greek param removal: %s" % from_template) else: pagemsg("Original template found, taking no action") else: if found_orig_template: pagemsg("WARNING: Undid removal, but original template %s already present!" % orig_template) if len(newtext) - len(text) != len(to_template) - len(from_template): pagemsg("WARNING: Length mismatch when undoing Greek param removal, may have matched multiple templates: from=%s, to=%s" % ( from_template, to_template)) changelog = "Undid removal of %s=%s in %s" % (removed_param, param_value, to_template) pagemsg("Change log = %s" % changelog) return newtext, changelog page = pywikibot.Page(site, pagename) if not page.exists(): msg("Page %s %s: WARNING, something wrong, does not exist" % ( index, pagename)) else: blib.do_edit(page, index, undo_one_page_greek_removal, save=save, verbose=verbose)
def undo_ru_auto_accent(save, verbose, direcfile, startFrom, upTo): template_removals = [] for line in codecs.open(direcfile, "r", encoding="utf-8"): line = line.strip() m = re.search(r"^Page [0-9]+ (.*?): Replaced (\{\{.*?\}\}) with (\{\{.*?\}\})$", line) if not m: msg("WARNING: Unable to parse line: [%s]" % line) else: template_removals.append(m.groups()) for current, index in blib.iter_pages(template_removals, startFrom, upTo, # key is the page name key = lambda x: x[0]): pagename, orig_template, repl_template = current if not re.search(r"^\{\{(ux|usex|ru-ux|lang)\|", orig_template): continue def undo_one_page_ru_auto_accent(page, index, text): def pagemsg(txt): msg("Page %s %s: %s" % (index, unicode(page.title()), txt)) text = unicode(text) if not re.search("^#\*:* *%s" % re.escape(repl_template), text, re.M): return None, "" found_orig_template = orig_template in text newtext = text.replace(repl_template, orig_template) changelog = "" if newtext == text: if not found_orig_template: pagemsg("WARNING: Unable to locate 'repl' template when undoing Russian auto-accenting: %s" % repl_template) else: pagemsg("Original template found, taking no action") else: pagemsg("Replaced %s with %s" % (repl_template, orig_template)) if found_orig_template: pagemsg("WARNING: Undid replacement, but original template %s already present!" % orig_template) if len(newtext) - len(text) != len(orig_template) - len(repl_template): pagemsg("WARNING: Length mismatch when undoing Russian auto-accenting, may have matched multiple templates: orig=%s, repl=%s" % ( orig_template, repl_template)) changelog = "Undid auto-accenting (per Wikitiki89) of %s" % (orig_template) pagemsg("Change log = %s" % changelog) return newtext, changelog page = pywikibot.Page(site, pagename) if not page.exists(): msg("Page %s %s: WARNING, something wrong, does not exist" % ( index, pagename)) else: blib.do_edit(page, index, undo_one_page_ru_auto_accent, save=save, verbose=verbose)
def process_lemma(index, pagetitle, slots, program_args): def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, program_args.verbose) page = pywikibot.Page(site, pagetitle) parsed = blib.parse(page) for t in parsed.filter_templates(): tn = tname(t) pos = None if tn == "la-conj": pos = "verb" elif tn == "la-ndecl": pos = "noun" elif tn == "la-adecl": pos = "adj" if pos: args = lalib.generate_infl_forms(pos, unicode(t), errandpagemsg, expand_text) for slot in args: matches = False for spec in slots: if spec == slot: matches = True break if lalib.slot_matches_spec(slot, spec): matches = True break if matches: for formpagename in re.split(",", args[slot]): if "[" in formpagename or "|" in formpagename: pagemsg("WARNING: Skipping page %s with links in it" % formpagename) else: formpagename = lalib.remove_macrons(formpagename) formpage = pywikibot.Page(site, formpagename) if not formpage.exists(): pagemsg("WARNING: Form page %s doesn't exist, skipping" % formpagename) elif formpagename == pagetitle: pagemsg("WARNING: Skipping dictionary form") else: def do_process_page(page, index, parsed): return process_page(index, page, program_args) blib.do_edit(formpage, index, do_process_page, save=program_args.save, verbose=program_args.verbose, diff=program_args.diff)
def rewrite_template_names(old, new, removelist, save, verbose, startFrom, upTo): def rewrite_one_page_template_names(page, index, text): actions = [] for template in text.filter_templates(): if template.name == old: actions.append("rename {{temp|%s}} to {{temp|%s}}" % (old, new)) template.name = new for remove in removelist: if template.has(remove): template.remove(remove) actions.append("remove %s=" % remove) return text, '; '.join(actions) for index, page in blib.references("Template:%s" % old, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_template_names, save=save, verbose=verbose)
def restore_removed_pagehead(index, pagetitle, comment, oldrevid): global args def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def errpagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) errmsg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing page with comment = %s" % comment) if re.search('(add|replace).*Etymology section', comment): page = pywikibot.Page(site, pagetitle) oldtext = page.getOldVersion(oldrevid) oldtext_pagehead = re.split("(^==[^=\n]+==\n)", oldtext, 0, re.M)[0] if oldtext_pagehead: newtext_pagehead = re.split("(^==[^=\n]+==\n)", page.text, 0, re.M)[0] if newtext_pagehead != oldtext_pagehead: if newtext_pagehead: errpagemsg( "WARNING: Something weird, old page has pagehead <%s> and new page has different pagehead <%s>" % (oldtext_pagehead, newtext_pagehead)) return pagemsg("Adding old pagehead <%s> to new page" % oldtext_pagehead) pagetext = page.text newtext = oldtext_pagehead + pagetext def do_process_page(pg, ind, parsed): return newtext, [ "Restore missing page head: %s" % oldtext_pagehead.strip() ] blib.do_edit(page, index, do_process_page, save=args.save, verbose=args.verbose, diff=args.diff)
def process_non_lemma_page(page, index): global args pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) if tn in ["la-adj-comp", "la-adj-sup"]: lemma = getparam(t, "1") or pagetitle pos = getparam(t, "pos") if pos: def do_process(page, index, parsed): return process_lemma_page(page, index, tn == "la-adj-comp", lemma) blib.do_edit(pywikibot.Page(site, lalib.remove_macrons(pos)), index, do_process, save=args.save, verbose=args.verbose, diff=args.diff) else: pagemsg("WARNING: Didn't see positive degree: %s" % unicode(t))
def process_page(index, page, save, verbose, diff): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) pagemsg("Processing") parsed = blib.parse(page) for t in parsed.filter_templates(): if tname(t) == "la-conj": args = lalib.generate_verb_forms(unicode(t), errandpagemsg, expand_text) supforms = args.get("sup_acc", "") if supforms: supforms = supforms.split(",") for supform in supforms: non_impers_part = re.sub("um$", "us", supform) pagemsg( "Line to delete: part %s allbutnomsgn {{la-adecl|%s}}" % (non_impers_part, non_impers_part)) def do_correct_nom_sg_n_participle(page, index, parsed): return correct_nom_sg_n_participle( page, index, supform, args["1s_pres_actv_indc"]) blib.do_edit(pywikibot.Page(site, lalib.remove_macrons(supform)), index, do_correct_nom_sg_n_participle, save=save, verbose=verbose, diff=diff)
def process_page(index, pos, lemma, subs, infl, save, verbose): def pagemsg(txt): msg("Page %s %s: %s" % (index, lemma, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, lemma, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, remove_macrons(lemma), pagemsg, verbose) pagemsg("Processing") args = lalib.generate_infl_forms(pos, infl, errandpagemsg, expand_text) if args is None: return forms_to_delete = [] for key, form in args.iteritems(): forms_to_delete.extend(form.split(",")) for formind, form in blib.iter_items(forms_to_delete): def handler(page, formind, parsed): return process_form(index, page, lemma, formind, form, subs) blib.do_edit(pywikibot.Page(site, remove_macrons(form)), formind, handler, save=save, verbose=verbose)
def delete_form(index, lemma, formind, formval, lang, save, verbose, diff): def pagemsg(txt): msg("Page %s %s: form %s %s: %s" % (index, lemma, formind, formval, txt)) if "[" in formval: pagemsg("Skipping form value %s with link in it" % formval) return page = pywikibot.Page(site, formval) if not page.exists(): pagemsg("Skipping form value %s, page doesn't exist" % formval) return def do_delete_form_1(page, index, parsed): return delete_form_1(page, index, lemma, formind, formval, lang) blib.do_edit(page, index, do_delete_form_1, save=save, verbose=verbose, diff=diff)
def do_verbs(save, startFrom, upTo): def do_one_page_verb(page, index, text): pagename = page.title() verbcount = 0 verbids = [] for template in text.filter_templates(): if template.name == "ar-conj": verbcount += 1 vnvalue = getparam(template, "vn") uncertain = False if vnvalue.endswith("?"): vnvalue = vnvalue[:-1] msg("Page %s %s: Verbal noun(s) identified as uncertain" % ( index, pagename)) uncertain = True if not vnvalue: continue vns = re.split(u"[,،]", vnvalue) form = getparam(template, "1") verbid = "#%s form %s" % (verbcount, form) if re.match("^[1I](-|$)", form): verbid += " (%s,%s)" % (getparam(template, "2"), getparam(template, "3")) no_i3rab_vns = [] for vn in vns: no_i3rab_vns.append(remove_i3rab(pagename, index, verbid, vn)) newvn = ",".join(no_i3rab_vns) if uncertain: newvn += "?" if newvn != vnvalue: msg("Page %s %s: Verb %s, replacing %s with %s" % ( index, pagename, verbid, vnvalue, newvn)) addparam(template, "vn", newvn) verbids.append(verbid) return text, "Remove i3rab from verbal nouns for verb(s) %s" % ( ', '.join(verbids)) for page, index in blib.cat_articles("Arabic verbs", startFrom, upTo): blib.do_edit(page, index, do_one_page_verb, save=save, verbose=verbose)
def do_verbs(save, startFrom, upTo): def do_one_page_verb(page, index, text): pagename = page.title() verbcount = 0 verbids = [] for template in text.filter_templates(): if template.name == "ar-conj": verbcount += 1 vnvalue = getparam(template, "vn") uncertain = False if vnvalue.endswith("?"): vnvalue = vnvalue[:-1] msg("Page %s %s: Verbal noun(s) identified as uncertain" % ( index, pagename)) uncertain = True if not vnvalue: continue vns = re.split(u"[,،]", vnvalue) form = getparam(template, "1") verbid = "#%s form %s" % (verbcount, form) if re.match("^[1I](-|$)", form): verbid += " (%s,%s)" % (getparam(template, "2"), getparam(template, "3")) no_i3rab_vns = [] for vn in vns: no_i3rab_vns.append(remove_i3rab(pagename, index, verbid, vn)) newvn = ",".join(no_i3rab_vns) if uncertain: newvn += "?" if newvn != vnvalue: msg("Page %s %s: Verb %s, replacing %s with %s" % ( index, pagename, verbid, vnvalue, newvn)) addparam(template, "vn", newvn) verbids.append(verbid) return text, "Remove i3rab from verbal nouns for verb(s) %s" % ( ', '.join(verbids)) for index, page in blib.cat_articles("Arabic verbs", startFrom, upTo): blib.do_edit(page, index, do_one_page_verb, save=save, verbose=verbose)
def search_category_for_missing_form(form, pos, templates, save, startFrom, upTo): if not isinstance(templates, list): templates = [templates] cat = "Arabic %ss" % form repltemplate = templates[0] msg("---Searching [[Category:%s|%s]] for %s:---" % (cat, cat, ' or '.join(["{{temp|%s}}" % temp for temp in templates]))) def parse_infls(infltext, tr): fs = [] ftrs = [] pls = [] pltrs = [] fpls = [] fpltrs = [] for rawinfl in re.split(", *", infltext): if not rawinfl: continue infl = re.match("'*\{\{(?:lang|l)\|ar\|(.*?)\}\}'* *(?:(?:\{\{IPAchar\|)?\((.*?)\)(?:\}\})?)? *\{\{g\|(.*?)\}\}", rawinfl) if not infl: msg("WARNING: Unable to match infl-outside-head %s" % rawinfl) continue msg("Found infl outside head: %s" % infl.group(0)) if "|" in infl.group(1): msg("WARNING: Found | in head, skipping: %s" % infl.group(1)) continue if infl.group(3) == "f": fs.append(infl.group(1)) ftrs.append(infl.group(2)) elif infl.group(3) == "p": pls.append(infl.group(1)) pltrs.append(infl.group(2)) elif infl.group(3) == "f-p": fpls.append(infl.group(1)) fpltrs.append(infl.group(2)) else: msg("WARNING: Unrecognized inflection gender '%s'" % infl.group(3)) infls = "" if tr: infls += "|tr=%s" % tr def handle_infls(infls, arabic, latin, argname): count = 1 for ar in arabic: if count == 1: arg = argname else: arg = "%s%s" % (argname, count) infls += "|%s=%s" % (arg, ar) if latin[count - 1] != None: if count == 1: larg = "%str" % argname else: larg = "%s%str" % (argname, count) infls += "|%s=%s" % (larg, latin[count - 1]) count += 1 return infls infls = handle_infls(infls, fs, ftrs, "f") infls = handle_infls(infls, pls, pltrs, "pl") infls = handle_infls(infls, fpls, fpltrs, "fpl") return infls def remove_empty_args(templ): templ = re.sub(r"\|+\}", "}", templ) templ = re.sub(r"\|\|+([A-Za-z0-9_]+=)", r"|\1", templ) return templ def correct_one_page_headword_formatting(page, index, text): text = unicode(text) pagetitle = page.title() sawtemp = False for temp in templates: if "{{%s" % temp in text: sawtemp = True if not sawtemp: if "{{head|ar|" in text: msg("* %s not in {{l|ar|%s}} but {{temp|head|ar}} is" % (' or '.join(templates), pagetitle)) else: msg("* %s not in {{l|ar|%s}}, nor {{temp|head|ar}}" % (' or '.join(templates), pagetitle)) replsfound = 0 for m in re.finditer(r'(===+%s===+\s*)\{\{head\|ar\|(?:sc=Arab\|)?%s((?:\|[A-Za-z0-9_]+=(?:\[[^\]]*\]|[^|}])*)*)\}\} *(?:(?:\{\{IPAchar\|)?\((.*?)\)(?:\}\})?)? *((?:,[^,\n]*)*)(.*)' % (pos, form), text, re.I): replsfound += 1 msg("Found match: %s" % m.group(0)) if m.group(5): msg("WARNING: Trailing text %s" % m.group(5)) head = "" g = "" tr = None for infl in re.finditer(r"\|([A-Za-z0-9_]+)=((?:\[[^\]]*\]|[^|}])*)", m.group(2)): msg("Found infl within head: %s" % infl.group(0)) if infl.group(1) == "head": head = infl.group(2).replace("'", "") elif infl.group(1) == "g": g = infl.group(2).replace("'", "") elif infl.group(1) == "tr": tr = infl.group(2) elif infl.group(1) == "sc": pass else: msg("WARNING: Unrecognized argument '%s'" % infl.group(1)) if m.group(3): tr = m.group(3) infls = parse_infls(m.group(4), tr) repl = "{{%s|%s|%s%s}}" % (repltemplate, head, g, infls) repl = remove_empty_args(repl) repl = m.group(1) + repl + m.group(5) # Include leading, trailing text msg("Replacing\n%s\nwith\n%s" % (m.group(0), repl)) newtext = text.replace(m.group(0), repl, 1) if newtext == text: msg("WARNING: Unable to do replacement") else: text = newtext for m in re.finditer(r"(===+%s===+\s*)(?:'*\{\{(?:lang|l)\|ar\|(.*?)\}\}'*|'+([^{}']+)'+) *(?:(?:\{\{IPAchar\|)?\((.*?)\)(?:\}\})?)? *(?:\{\{g\|(.*?)\}\})? *((?:,[^,\n]*)*)(.*)" % pos, text, re.I): replsfound += 1 msg("Found match: %s" % m.group(0)) if m.group(7): msg("WARNING: Trailing text %s" % m.group(7)) head = m.group(2) or m.group(3) g = m.group(5) or "" tr = m.group(4) infls = parse_infls(m.group(6), tr) repl = "{{%s|%s|%s%s}}" % (repltemplate, head, g, infls) repl = remove_empty_args(repl) repl = m.group(1) + repl + m.group(7) # Include leading, trailing text msg("Replacing\n%s\nwith\n%s" % (m.group(0), repl)) newtext = text.replace(m.group(0), repl, 1) if newtext == text: msg("WARNING: Unable to do replacement") else: text = newtext # If there's a blank line before and after the category, leave a single # blank line newtext, nsubs = \ re.subn(r"\n\n\[\[Category:%s\]\]\n\n" % cat, "\n\n", text, 1) if nsubs == 0: newtext = re.sub(r"\[\[Category:%s\]\]\n?" % cat, "", text, 1) if newtext != text: msg("Removed [[Category:%s]]" % cat) text = newtext else: msg("WARNING: Unable to remove [[Category:%s]]" % cat) if not sawtemp and replsfound == 0: msg("WARNING: No replacements found for {{l|ar|%s}}" % pagetitle) return text, "Correct headword formatting for [[:Category:%s]]" % cat for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, correct_one_page_headword_formatting, save=save)
def rewrite_verb_headword(save, startFrom, upTo): for cat in [u"Arabic verbs"]: for page, index in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_verb_headword, save=save)
def clean_verb_headword(save, startFrom, upTo): for cat in [u"Arabic verbs"]: for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, clean_one_page_verb_headword, save=save)
t.add("1", "hu") # Put remaining parameters in order. for name, value, showkey in params: if re.search("^[0-9]+$", name): t.add(str(int(name) + 1), value, showkey=showkey, preserve_spacing=False) else: t.add(name, value, showkey=showkey, preserve_spacing=False) blib.set_template_name(t, "affix") notes.append("convert {{hu-suffix}} to {{affix}}") if unicode(t) != origt: pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t))) return unicode(parsed), notes parser = blib.create_argparser("Clean up {{hu-suffix}}") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for template in ["hu-suffix"]: msg("Processing references to Template:%s" % template) for i, page in blib.references("Template:%s" % template, start, end): blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
def correct_link_formatting(save, startFrom, upTo): for cat in [u"Arabic lemmas", u"Arabic non-lemma forms"]: for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, correct_one_page_link_formatting, save=save)
def rewrite_ar_plural(save, verbose, startFrom, upTo): for cat in [u"Arabic plurals"]: for page, index in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_ar_plural, save=save, verbose=verbose)
return "test_infer" for pagetext in test_templates: text = blib.parse_text(pagetext) page = Page() newtext, comment = infer_one_page_decls(page, 1, text) msg("newtext = %s" % unicode(newtext)) msg("comment = %s" % comment) parser = blib.create_argparser("Add pronunciation sections to Russian Wiktionary entries") parser.add_argument('--mockup', action="store_true", help="Use mocked-up test code") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) mockup = args.mockup def ignore_page(page): if not isinstance(page, basestring): page = unicode(page.title()) if re.search(r"^(Appendix|Appendix talk|User|User talk|Talk):", page): return True return False if mockup: test_infer() else: for tempname in decl_templates: for index, page in blib.references("Template:" + tempname, start, end): if ignore_page(page): msg("Page %s %s: Skipping due to namespace" % (index, unicode(page.title()))) else: blib.do_edit(page, index, infer_one_page_decls, save=args.save)
def undo_greek_removal(save, verbose, direcfile, startFrom, upTo): template_removals = [] for line in codecs.open(direcfile, "r", encoding="utf-8"): line = line.strip() m = re.match( r"\* \[\[(.*?)]]: Removed (.*?)=.*?: <nowiki>(.*?)</nowiki>$", line) if not m: msg("WARNING: Unable to parse line: [%s]" % line) else: template_removals.append(m.groups()) for current, index in blib.iter_pages( template_removals, startFrom, upTo, # key is the page name key=lambda x: x[0]): pagename, removed_param, template_text = current def undo_one_page_greek_removal(page, index, text): def pagemsg(txt): msg("Page %s %s: %s" % (index, unicode(page.title()), txt)) template = blib.parse_text(template_text).filter_templates()[0] orig_template = unicode(template) if getparam(template, "sc") == "polytonic": template.remove("sc") to_template = unicode(template) param_value = getparam(template, removed_param) template.remove(removed_param) from_template = unicode(template) text = unicode(text) found_orig_template = orig_template in text newtext = text.replace(from_template, to_template) changelog = "" if newtext == text: if not found_orig_template: pagemsg( "WARNING: Unable to locate 'from' template when undoing Greek param removal: %s" % from_template) else: pagemsg("Original template found, taking no action") else: if found_orig_template: pagemsg( "WARNING: Undid removal, but original template %s already present!" % orig_template) if len(newtext) - len(text) != len(to_template) - len( from_template): pagemsg( "WARNING: Length mismatch when undoing Greek param removal, may have matched multiple templates: from=%s, to=%s" % (from_template, to_template)) changelog = "Undid removal of %s=%s in %s" % ( removed_param, param_value, to_template) pagemsg("Change log = %s" % changelog) return newtext, changelog page = pywikibot.Page(site, pagename) if not page.exists(): msg("Page %s %s: WARNING, something wrong, does not exist" % (index, pagename)) else: blib.do_edit(page, index, undo_one_page_greek_removal, save=save, verbose=verbose)
def rewrite_ar_plural(save, verbose, startFrom, upTo): for cat in [u"Arabic plurals"]: for index, page in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_ar_plural, save=save, verbose=verbose)
#!/usr/bin/env python #coding: utf-8 import blib, pywikibot, re, string, sys, codecs from blib import addparam import arabiclib def fix(page, index, text): for template in text.filter_templates(): if template.name in arabiclib.arabic_all_headword_templates: if template.has("head") and not template.has(1) and not template.has(2) and not template.has(3) and not template.has(4) and not template.has(5) and not template.has(6) and not template.has(7) and not template.has(8): head = unicode(template.get("head").value) template.remove("head") addparam(template, "head", head, before=template.params[0].name if len(template.params) > 0 else None) if template.params[0].name == "head": template.get("head").showkey = False return text, "ar headword: head= > 1=" startFrom, upTo = blib.parse_args() for index, page in blib.references(u"Template:tracking/ar-head/head", startFrom, upTo): blib.do_edit(page, index, fix)
msg("Retrieving pages from %s ..." % cat) errmsg("Retrieving pages from %s ..." % cat) for index, page in blib.cat_articles(cat, None, None): yield page.title() if params.ignore_lemma_non_lemma: pages_to_ignore = set(yield_lemma_non_lemma_page_titles()) else: pages_to_ignore = set() for category in yield_cats(): msg("Processing category %s ..." % category) errmsg("Processing category %s ..." % category) for index, page in blib.cat_articles(category, startFrom, upTo): if page.title() not in pages_to_ignore: blib.do_edit(page, index, remove_translit_one_page, save=params.save, verbose=params.verbose) pa = blib.init_argparser("Remove translit, sc= from hy, xcl, ka, el, grc templates") pa.add_argument("--langs", default="all", help="Languages to do, a comma-separated list or 'all'") pa.add_argument("--cattype", default="all", help="""Categories to examine ('all' or comma-separated list of 'translit', 'lemma', 'non-lemma'; default 'all')""") pa.add_argument("--ignore-lemma-non-lemma", action="store_true", help="""Ignore lemma and non-lemma pages (useful with '--cattype translit').""") pa.add_argument("--do-head", action="store_true", help="""Remove tr= in {{head|..}}""") params = pa.parse_args() startFrom, upTo = blib.parse_start_end(params.start, params.end) remove_translit(params, startFrom, upTo)
parser = blib.create_argparser( "Add pronunciation sections to Russian Wiktionary entries") parser.add_argument('--mockup', action="store_true", help="Use mocked-up test code") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) mockup = args.mockup def ignore_page(page): if not isinstance(page, basestring): page = unicode(page.title()) if re.search(r"^(Appendix|Appendix talk|User|User talk|Talk):", page): return True return False if mockup: test_infer() else: for tempname in decl_templates: for index, page in blib.references("Template:" + tempname, start, end): if ignore_page(page): msg("Page %s %s: Skipping due to namespace" % (index, unicode(page.title()))) else: blib.do_edit(page, index, infer_one_page_decls, save=args.save)
parser.add_argument("--comment", help="Comment to use when saving pages.", required=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) fulltext = codecs.open(args.textfile, "r", "utf-8").read() titles_and_text = re.split(r"\n\n\n\n+", fulltext) assert len(titles_and_text) % 2 == 0 title_and_text_pairs = [] for i in xrange(0, len(titles_and_text), 2): title_and_text_pairs.append((titles_and_text[i], titles_and_text[i + 1])) for i, (pagetitle, pagetext) in blib.iter_items(title_and_text_pairs, start, end, get_name=lambda x: x[0]): def handler(page, index, parsed): return process_page(page, index, pagetext, args.comment.decode('utf-8')) blib.do_edit(pywikibot.Page(site, pagetitle), i, handler, save=args.save, verbose=args.verbose)
def process_page(page, index): global args pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose) text = unicode(page.text) retval = lalib.find_latin_section(text, pagemsg) if retval is None: return sections, j, secbody, sectail, has_non_latin = retval parsed = blib.parse_text(secbody) saw_noun = None saw_proper_noun = None for t in parsed.filter_templates(): tn = tname(t) if tn == "la-noun": if saw_noun: pagemsg( "WARNING: Saw multiple nouns %s and %s, not sure how to proceed, skipping" % (unicode(saw_noun), unicode(t))) return saw_noun = t elif tn == "la-proper noun": if saw_proper_noun: pagemsg( "WARNING: Saw multiple proper nouns %s and %s, not sure how to proceed, skipping" % (unicode(saw_proper_noun), unicode(t))) return saw_proper_noun = t if saw_noun and saw_proper_noun: pagemsg( "WARNING: Saw both noun and proper noun, can't correct header/headword" ) return if not saw_noun and not saw_proper_noun: pagemsg( "WARNING: Saw neither noun nor proper noun, can't correct header/headword" ) return pos = "pn" if saw_proper_noun else "n" ht = saw_proper_noun or saw_noun if getparam(ht, "indecl"): pagemsg("Noun is indeclinable, skipping: %s" % unicode(ht)) return generate_template = blib.parse_text(unicode(ht)).filter_templates()[0] blib.set_template_name(generate_template, "la-generate-noun-forms") blib.remove_param_chain(generate_template, "lemma", "lemma") blib.remove_param_chain(generate_template, "m", "m") blib.remove_param_chain(generate_template, "f", "f") blib.remove_param_chain(generate_template, "g", "g") rmparam(generate_template, "type") rmparam(generate_template, "indecl") rmparam(generate_template, "id") rmparam(generate_template, "pos") result = expand_text(unicode(generate_template)) if not result: pagemsg("WARNING: Error generating forms, skipping") return tempargs = blib.split_generate_args(result) forms_seen = set() slots_and_forms_to_process = [] for slot, formarg in tempargs.iteritems(): forms = formarg.split(",") for form in forms: if "[" in form or "|" in form: continue form_no_macrons = lalib.remove_macrons(form) if form_no_macrons == pagetitle: continue if form_no_macrons in forms_seen: continue forms_seen.add(form_no_macrons) slots_and_forms_to_process.append((slot, form)) for index, (slot, form) in blib.iter_items( sorted(slots_and_forms_to_process, key=lambda x: lalib.remove_macrons(x[1]))): def handler(page, index, parsed): return process_form(page, index, slot, form, pos) blib.do_edit(pywikibot.Page(site, lalib.remove_macrons(form)), index, handler, save=args.save, verbose=args.verbose, diff=args.diff)
lineno = 0 for line in codecs.open(args.direcfile, "r", encoding="utf-8"): lineno += 1 line = line.strip() if line.startswith("#"): continue verb = blib.remove_links(re.sub("<.*?>", "", line)) verbs[verb] = line def do_process_page(page, index, parsed=None): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagetext = blib.safe_page_text(page, pagemsg) return process_text_on_page_for_full_conj(index, pagetitle, pagetext, verbs) page = pywikibot.Page(site, verb) blib.do_edit(page, lineno, do_process_page, save=args.save, verbose=args.verbose, diff=args.diff) elif args.mode == "generate": verbs = {} for line in codecs.open(args.direcfile, "r", encoding="utf-8"): line = line.strip() if line.startswith("#"): continue if " " not in line: errandmsg("WARNING: No space in line: %s" % line) continue verb, spec = line.split(" ", 1) verbs[verb] = spec def do_process_page(page, index): return process_page_for_generate(page, index, verbs) blib.do_pagefile_cats_refs(args, start, end, do_process_page) else:
from blib import getparam, rmparam, msg, site def process_page(page, index, parsed): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") return "#REDIRECT [[Module:ru-verb/documentation]]", "redirect to [[Module:ru-verb/documentation]]" parser = blib.create_argparser("Redirect ru-conj-* documentation pages") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) types = ["7a", "7b", "8a", "8b", "9a", "9b", "10a", "10c", "11a", "11b", "12a", "12b", "13b", "14a", "14b", "14c", "15a", "16a", "16b", u"irreg-бежать", u"irreg-спать", u"irreg-хотеть", u"irreg-дать", u"irreg-есть", u"irreg-сыпать", u"irreg-лгать", u"irreg-мочь", u"irreg-слать", u"irreg-идти", u"irreg-ехать", u"irreg-минуть", u"irreg-живописать-миновать", u"irreg-лечь", u"irreg-зиждиться", u"irreg-клясть", u"irreg-слыхать-видать", u"irreg-стелить-стлать", u"irreg-быть", u"irreg-ссать-сцать", u"irreg-чтить", u"irreg-ошибиться", u"irreg-плескать", u"irreg-внимать", u"irreg-обязывать"] for i, ty in blib.iter_items(types, start, end): template = "Template:ru-conj-%s/documentation" % ty blib.do_edit(pywikibot.Page(site, template), i, process_page, save=args.save, verbose=args.verbose, diff=args.diff)
def rewrite_ru_decl_adj(save, verbose, startFrom, upTo): for cat in [u"Russian adjectives"]: for page, index in blib.cat_articles(cat, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_ru_decl_adj, save=save, verbose=verbose)
if newname == "he-noun form of" and newspecs: if name in ["p", "g", "n"]: name = "p" + name t.add(name, value, showkey=showkey, preserve_spacing=False) # Finally add nocap=1 if requested. if add_nocap: t.add("nocap", "1") if unicode(t) != origt: pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t))) text = unicode(parsed) return text, notes parser = blib.create_argparser("Clean up {{he-*}} templates") parser.add_argument('--move-dot', help="Move .= outside of template", action="store_true") parser.add_argument('--rename', help="Rename templates", action="store_true") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for template in all_he_form_of_templates: for i, page in blib.references("Template:%s" % template, start, end): blib.do_edit(page, i, lambda page, index, parsed: process_page(page, index, parsed, args.move_dot, args.rename), save=args.save, verbose=args.verbose )
notes.append("convert 3+ newlines to 2") return text, notes parser = blib.create_argparser("Add missing declension to Latin terms") parser.add_argument( "--direcfile", help="File of output directives from make_latin_missing_decl.py", required=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) lines = [x.rstrip('\n') for x in codecs.open(args.direcfile, "r", "utf-8")] for i, line in blib.iter_items(lines, start, end): m = re.search("^Page [0-9]+ (.*?): For noun (.*?), declension (.*?)$", line) if not m: msg("Unrecognized line, skipping: %s" % line) else: pagename, headword_template, decl_template = m.groups() def do_process_page(page, index, parsed): return process_page(page, index, headword_template, decl_template) blib.do_edit(pywikibot.Page(site, pagename), i, do_process_page, save=args.save, verbose=args.verbose, diff=args.diff)
def rewrite_idafa(save, verbose, startFrom, upTo): for template in arabic_decl_templates: for page, index in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_idafa, save=save, verbose=verbose)
def push_manual_changes(save, verbose, direcfile, annotation, startFrom, upTo): template_changes = [] for line in codecs.open(direcfile, "r", encoding="utf-8"): line = line.strip() m = re.match(r"^Page [^ ]+ (.*?): .*?: (\{\{.*?\}\}) <- \{\{.*?\}\} \((\{\{.*?\}\})\)$", line) if not m: m = re.match(r"^\* (?:Page [^ ]+ )?\[\[(.*?)\]\]: .*?: <nowiki>(\{\{.*?\}\}) <- \{\{.*?\}\} \((\{\{.*?\}\})\)</nowiki>.*$", line) if not m: msg("WARNING: Unable to parse line: [%s]" % line) continue if m.group(2) != m.group(3): # If the current template is the same as the current template of the # previous entry, ignore the previous entry; otherwise we won't be # able to locate the current template the second time around. This # happens e.g. in the output of find_russian_need_vowels.py when # processing a template such as cardinalbox or compound that has # more than one foreign-language parameter in it. if len(template_changes) > 0 and template_changes[-1][2] == m.group(3): msg("Ignoring change for pagename %s, %s -> %s" % template_changes[-1]) template_changes.pop() template_changes.append(m.groups()) for current, index in blib.iter_pages(template_changes, startFrom, upTo, # key is the page name key = lambda x: x[0]): pagename, repl_template, curr_template = current def push_one_manual_change(page, index, text): def pagemsg(txt): msg("Page %s %s: %s" % (index, unicode(page.title()), txt)) #template = blib.parse_text(template_text).filter_templates()[0] #orig_template = unicode(template) #if getparam(template, "sc") == "polytonic": # template.remove("sc") #to_template = unicode(template) #param_value = getparam(template, removed_param) #template.remove(removed_param) #from_template = unicode(template) text = unicode(text) found_repl_template = repl_template in text newtext = text.replace(curr_template, repl_template) changelog = "" if newtext == text: if not found_repl_template: pagemsg("WARNING: Unable to locate current template: %s" % curr_template) else: pagemsg("Replacement template already found, taking no action") else: if found_repl_template: pagemsg("WARNING: Made change, but replacement template %s already present!" % repl_template) repl_curr_diff = len(repl_template) - len(curr_template) newtext_text_diff = len(newtext) - len(text) if newtext_text_diff == repl_curr_diff: pass else: ratio = float(newtext_text_diff) / repl_curr_diff if ratio == int(ratio): pagemsg("WARNING: Replaced %s occurrences of curr=%s with repl=%s" % (int(ratio), curr_template, repl_template)) else: pagemsg("WARNING: Something wrong, length mismatch during replacement: Expected length change=%s, actual=%s, ratio=%.2f, curr=%s, repl=%s" % (repl_curr_diff, newtext_text_diff, ratio, curr_template, repl_template)) changelog = "Replaced %s with %s (%s)" % (curr_template, repl_template, annotation) pagemsg("Change log = %s" % changelog) return newtext, changelog page = pywikibot.Page(site, pagename) if not page.exists(): msg("Page %s %s: WARNING, something wrong, does not exist" % ( index, pagename)) else: blib.do_edit(page, index, push_one_manual_change, save=save, verbose=verbose)
def split_etymologies(save, verbose, startFrom, upTo): def split_page_etymologies(page, index, pagetext): return split_one_page_etymologies(page, index, pagetext, verbose) for index, page in blib.cat_articles("Arabic lemmas", startFrom, upTo): blib.do_edit(page, index, split_page_etymologies, save=save, verbose=verbose)