def fix_smp(save, verbose, startFrom, upTo): for template in arabic_decl_templates: # Fix the template refs. If cap= is present, remove it; else, add lc=. def fix_one_page_smp(page, index, text): pagetitle = page.title() for t in text.filter_templates(): head = reorder_shadda(getparam(t, "1")) if t.name.startswith("ar-decl-"): param = "pl" pl = getparam(t, param) i = 2 while pl: if pl == "smp": if head.endswith(TAM): msg("Page %s %s: WARNING: Found %s=smp with feminine ending head %s in %s: not changing" % (index, pagetitle, param, head, t.name)) else: msg("Page %s %s: Changing %s=smp to %s=sp in %s" % (index, pagetitle, param, param, t.name)) addparam(t, param, "sp") param = "pl%s" % i pl = getparam(t, param) i += 1 changelog = "Change pl=smp to pl=sp" msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog)) return text, changelog for index, page in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, fix_one_page_smp, save=save, verbose=verbose)
def canonicalize_verb_form(save, startFrom, upTo, tempname, formarg): # Canonicalize the form in ar-conj. # Returns the changed text along with a changelog message. def canonicalize_one_page_verb_form(page, index, text): pagetitle = page.title() msg("Processing page %s" % pagetitle) actions_taken = [] for template in text.filter_templates(): if template.name == tempname: origtemp = unicode(template) form = getparam(template, formarg) if form: addparam(template, formarg, canonicalize_form(form)) newtemp = unicode(template) if origtemp != newtemp: msg("Replacing %s with %s" % (origtemp, newtemp)) if re.match("^[1I](-|$)", form): actions_taken.append("form=%s (%s/%s)" % (form, getparam(template, str(1+int(formarg))), getparam(template, str(2+int(formarg))))) else: actions_taken.append("form=%s" % form) changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % ( tempname, formarg, '; '.join(actions_taken)) if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog for page, index in blib.references("Template:%s" % tempname, startFrom, upTo): blib.do_edit(page, index, canonicalize_one_page_verb_form, save=save)
def rewrite_pages(refrom, reto, refs, cat, pages, pagefile, pagetitle_sub, comment, filter_pages, save, verbose, startFrom, upTo): def rewrite_one_page(page, index, text): #blib.msg("From: [[%s]], To: [[%s]]" % (refrom, reto)) text = unicode(text) text = reorder_shadda(text) zipped_fromto = zip(refrom, reto) for fromval, toval in zipped_fromto: if pagetitle_sub: pagetitle = unicode(page.title()) fromval = fromval.replace(pagetitle_sub, re.escape(pagetitle)) toval = toval.replace(pagetitle_sub, pagetitle) text = re.sub(fromval, toval, text) return text, comment or "replace %s" % (", ".join("%s -> %s" % (f, t) for f, t in zipped_fromto)) if pages: pages = ((pywikibot.Page(blib.site, page), index) for page, index in blib.iter_pages(pages, startFrom, upTo)) elif pagefile: lines = [x.strip() for x in codecs.open(pagefile, "r", "utf-8")] pages = ((pywikibot.Page(blib.site, page), index) for page, index in blib.iter_pages(lines, startFrom, upTo)) elif refs: pages = blib.references(refs, startFrom, upTo, includelinks=True) else: pages = blib.cat_articles(cat, startFrom, upTo) for page, index in pages: pagetitle = unicode(page.title()) if filter_pages and not re.search(filter_pages, pagetitle): blib.msg("Skipping %s because doesn't match --filter-pages regex %s" % (pagetitle, filter_pages)) else: if verbose: blib.msg("Processing %s" % pagetitle) blib.do_edit(page, index, rewrite_one_page, save=save, verbose=verbose)
def fix_smp(save, verbose, startFrom, upTo): for template in arabic_decl_templates: # Fix the template refs. If cap= is present, remove it; else, add lc=. def fix_one_page_smp(page, index, text): pagetitle = page.title() for t in text.filter_templates(): head = reorder_shadda(getparam(t, "1")) if t.name.startswith("ar-decl-"): param = "pl" pl = getparam(t, param) i = 2 while pl: if pl == "smp": if head.endswith(TAM): msg("Page %s %s: WARNING: Found %s=smp with feminine ending head %s in %s: not changing" % ( index, pagetitle, param, head, t.name)) else: msg("Page %s %s: Changing %s=smp to %s=sp in %s" % ( index, pagetitle, param, param, t.name)) addparam(t, param, "sp") param = "pl%s" % i pl = getparam(t, param) i += 1 changelog = "Change pl=smp to pl=sp" msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog)) return text, changelog for page, index in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, fix_one_page_smp, save=save, verbose=verbose)
def fix_tool_place_noun(save, verbose, startFrom, upTo): for template in ["ar-tool noun", "ar-noun of place", "ar-instance noun"]: # Fix the template refs. If cap= is present, remove it; else, add lc=. def fix_one_page_tool_place_noun(page, index, text): pagetitle = page.title() for t in text.filter_templates(): if t.name == template: if getparam(t, "cap"): msg("Page %s %s: Template %s: Remove cap=" % (index, pagetitle, template)) t.remove("cap") else: msg("Page %s %s: Template %s: Add lc=1" % (index, pagetitle, template)) addparam(t, "lc", "1") changelog = "%s: If cap= is present, remove it, else add lc=" % template msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog)) return text, changelog for index, page in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, fix_one_page_tool_place_noun, save=save, verbose=verbose)
def rewrite_ar_nisba(save, verbose, startFrom, upTo): for index, page in blib.references("Template:ar-nisba", startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_ar_nisba, save=save, verbose=verbose)
def canonicalize_verb_form(save, startFrom, upTo, tempname, formarg): # Canonicalize the form in ar-conj. # Returns the changed text along with a changelog message. def canonicalize_one_page_verb_form(page, index, text): pagetitle = page.title() msg("Processing page %s" % pagetitle) actions_taken = [] for template in text.filter_templates(): if template.name == tempname: origtemp = unicode(template) form = getparam(template, formarg) if form: addparam(template, formarg, canonicalize_form(form)) newtemp = unicode(template) if origtemp != newtemp: msg("Replacing %s with %s" % (origtemp, newtemp)) if re.match("^[1I](-|$)", form): actions_taken.append( "form=%s (%s/%s)" % (form, getparam(template, str(1 + int(formarg))), getparam(template, str(2 + int(formarg))))) else: actions_taken.append("form=%s" % form) changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % ( tempname, formarg, '; '.join(actions_taken)) if len(actions_taken) > 0: msg("Change log = %s" % changelog) return text, changelog for index, page in blib.references("Template:%s" % tempname, startFrom, upTo): blib.do_edit(page, index, canonicalize_one_page_verb_form, save=save)
def rewrite_idafa(save, verbose, startFrom, upTo): for template in arabic_decl_templates: for index, page in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_idafa, save=save, verbose=verbose)
def yield_pages(): if pages: for index, page in blib.iter_items(pages, startFrom, upTo): yield index, pywikibot.Page(blib.site, page), None if pagefile: lines = [x.strip() for x in codecs.open(pagefile, "r", "utf-8")] for index, page in blib.iter_items(lines, startFrom, upTo): yield index, pywikibot.Page(blib.site, page), None if from_to_pagefile: lines = [ x.strip() for x in codecs.open(from_to_pagefile, "r", "utf-8") ] for index, line in blib.iter_items(lines, startFrom, upTo): if " ||| " not in line: msg("WARNING: Saw bad line in --from-to-pagefile: %s" % line) continue frompage, topage = line.split(" ||| ") yield index, pywikibot.Page(blib.site, frompage), topage if refs: for ref in refs: for index, page in blib.references( ref, startFrom, upTo, only_template_inclusion=False): yield index, page, None if pages_and_refs: for page_and_refs in pages_and_refs: for index, page in blib.references( page_and_refs, startFrom, upTo, only_template_inclusion=False, include_page=True): yield index, page, None if cats: for cat in cats: for index, page in blib.cat_articles(cat, startFrom, upTo): yield index, page, None
def process_page(page, index): global args pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def errpagemsg(txt): errmsg("Page %s %s: %s" % (index, pagetitle, txt)) errpagemsg("Processing references") if not args.table_of_uses: pagemsg("Processing references") aliases = [] for i, subpage in blib.references(pagetitle, namespaces=[10], only_template_inclusion=False, filter_redirects=args.redirects_only): aliases.append(unicode(subpage.title())) if not args.table_of_uses: process_subpage(page, index, subpage, i) if args.table_of_uses: msg("%s%s" % (pagetitle.replace("Template:", ""), aliases and "," + ",".join(x.replace("Template:", "") for x in aliases) or ""))
def rewrite_template_names(old, new, removelist, save, verbose, startFrom, upTo): def rewrite_one_page_template_names(page, index, text): actions = [] for template in text.filter_templates(): if template.name == old: actions.append("rename {{temp|%s}} to {{temp|%s}}" % (old, new)) template.name = new for remove in removelist: if template.has(remove): template.remove(remove) actions.append("remove %s=" % remove) return text, '; '.join(actions) for index, page in blib.references("Template:%s" % old, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_template_names, save=save, verbose=verbose)
def create_declensions(save, pos, tempname, decltempname, sgnum, startFrom, upTo, removeparams, is_proper=False): for index, page in blib.references("Template:%s" % tempname, startFrom, upTo): create_declension(page, index, save, pos, tempname, decltempname, sgnum, removeparams, is_proper=is_proper)
import rulib as ru def process_page(index, page): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) found_headword_template = False for t in parsed.filter_templates(): if unicode(t.name) in ["ru-adj"]: found_headword_template = True if not found_headword_template: notes = [] for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun", "ru-noun+", "ru-proper noun", "ru-proper noun+"]: notes.append("found noun header (%s)" % unicode(t.name)) if unicode(t.name) == "head": notes.append("found head header (%s)" % getparam(t, "2")) pagemsg("Missing adj headword template%s" % (notes and "; " + ",".join(notes))) parser = blib.create_argparser("Find missing adjective headwords") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for index, page in blib.references("Template:ru-decl-adj", start, end): process_page(index, page)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") origtext = unicode(page.text) text = origtext text = re.sub(r"(\{\{was wotd\|.*?\}\}\n)(==English==\n)", r"\2\1", text) notes = ["put {{was wotd}} after ==English== per [[User:Smuconlaw]]"] if text != origtext: if verbose: pagemsg("Replacing <%s> with <%s>" % (origtext, text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Remove adj= and shto= from ru-ux") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.references("Template:was wotd", start, end): process_page(i, page, args.save, args.verbose)
if shch == u"щ": t.add("3", getparam(t, "3") + shch) rmparam(t, "4") notes.append(u"move param 4 (щ) to param 3") elif shch: pagemsg("WARNING: Strange value %s for param 4" % shch) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Convert class-4a 4th param щ to 3rd param") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.references("Template:tracking/ru-verb/conj-4a", start, end): process_page(i, page, args.save, args.verbose)
else: ru_proper_noun_changed = 1 return unicode(parsed), ru_noun_table_cleaned, ru_noun_table_link_copied, ru_noun_changed, ru_proper_noun_changed parser = blib.create_argparser("Copy the declension in ru-noun-table to ru-noun+, preserving any m=, f=, g=, etc. in the latter.") parser.add_argument('--cats', default="nouns,proper nouns", help="Categories to do ('nouns', 'proper nouns' or 'nouns,proper nouns')") parser.add_argument('--lemma-file', help="File containing lemmas to copy declension of. Will remove extraneous params from ru-noun-table and copy links to ru-noun-table regardless of this.") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) if args.lemma_file: lemmas = set([x.strip() for x in codecs.open(args.lemma_file, "r", "utf-8")]) else: lemmas = None for cat in re.split(",", args.cats): if cat == "nouns": template = "Template:ru-noun+" elif cat == "proper nouns": template = "Template:ru-proper noun+" else: raise ValueError("Invalid value to --cats: %s" % cat) msg("Processing references to %s" % template) if lemmas: for i, page in blib.iter_items(lemmas, start, end): process_page(i, pywikibot.Page(site, page), args.save, args.verbose, lemmas) else: for i, page in blib.references(template, start, end): process_page(i, page, args.save, args.verbose, lemmas)
for t in parsed.filter_templates(): if tname(t) == "R:Lexico": origt = unicode(t) rmparam(t, "lang") entry_uk = getparam(t, "entry_uk") if entry_uk: t.add("entry", entry_uk, before="entry_uk") rmparam(t, "entry_uk") url_uk = getparam(t, "url_uk") if url_uk: t.add("url", url_uk, before="url_uk") rmparam(t, "url_uk") p4 = getparam(t, "4") if p4: t.add("text", p4, before="4") rmparam(t, "4") newt = unicode(t) if origt != newt: notes.append("Remove/rearrange params in {{R:Lexico}}") pagemsg("Replaced %s with %s" % (origt, newt)) return parsed, notes parser = blib.create_argparser(u"Remove/rearrange params in {{R:Lexico}}") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.references("Template:R:Lexico", start, end): blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
arg_set.append(val) for t in parsed.filter_templates(): tname = unicode(t.name) if tname == "ru-decl-noun-see": pagemsg("WARNING: Skipping ru-decl-noun-see, can't handle yet: %s" % unicode(t)) elif tname in ["ru-noun+", "ru-proper noun+"]: pagemsg("Found %s" % unicode(t)) process_new_style_headword(t) elif tname in ["ru-noun", "ru-proper noun"]: pagemsg("WARNING: Skipping ru-noun or ru-proper noun, can't handle yet: %s" % unicode(t)) parser = blib.create_argparser(u"Find red links in multiword lemmas") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) msg("Reading Russian lemmas") for i, page in blib.cat_articles("Russian lemmas", start, end): lemmas.add(unicode(page.title())) for pos in ["nouns", "proper nouns"]: tracking_page = "Template:tracking/ru-headword/space-in-headword/" + pos msg("PROCESSING REFERENCES TO: %s" % tracking_page) for index, page in blib.references(tracking_page, start, end): process_page(index, page, args.verbose) for lemma, nonexistent_msg in sorted(nonexistent_lemmas.items(), key=lambda pair:(-lemma_count[pair[0]], pair[0])): msg("* [[%s]] (%s occurrence%s): %s (refs: %s)" % (lemma, lemma_count[lemma], "" if lemma_count[lemma] == 1 else "s", nonexistent_msg, ", ".join("[[%s]]" % x for x in nonexistent_lemmas_refs[lemma])))
newval = re.sub("^#\* #\* ", "#* ", subsections[j], 0, re.M) if newval != subsections[j]: notes.append("remove double #* prefix") pagemsg("Removed double #* prefix") subsections[j] = newval newtext = "".join(subsections) if text != newtext: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, newtext)) assert notes comment = "; ".join(blib.group_notes(notes)) if save: pagemsg("Saving with comment = %s" % comment) page.text = newtext page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) if __name__ == "__main__": parser = blib.create_argparser("Fix old cite/quote/reference templates") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for template in replace_templates: msg("Processing references to Template:%s" % template) errmsg("Processing references to Template:%s" % template) for i, page in blib.references("Template:%s" % template, start, end, includelinks=True): process_page(i, page, args.save, args.verbose)
def rewrite_idafa(save, verbose, startFrom, upTo): for template in arabic_decl_templates: for page, index in blib.references("Template:" + template, startFrom, upTo): blib.do_edit(page, index, rewrite_one_page_idafa, save=save, verbose=verbose)
pagemsg("Replacing %s with %s" % (origt, unicode(t))) if t.has("past_actv_part") and getparam(t, "past_actv_part") == "": notes.append("set past_actv_part=-") origt = unicode(t) t.add("past_actv_part", "-") pagemsg("Replacing %s with %s" % (origt, unicode(t))) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) if not notes: pagemsg("WARNING: No changes") parser = blib.create_argparser(u"Fix past_adv_part_short to use dash instead of blank") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.references("Template:tracking/ru-verb/different-conj", start, end): process_page(i, page, args.save, args.verbose)
def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] for t in parsed.filter_templates(): if tname(t) == "quote-Fanny Hill": origt = unicode(t) t.name = "RQ:Cleland Fanny Hill" rmparam(t, "part") if getparam(t, "1"): t.add("passage", getparam(t, "1")) rmparam(t, "1") notes.append( "Replace {{quote-Fanny Hill}} with {{RQ:Cleland Fanny Hill}}") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return parsed, notes parser = blib.create_argparser( u"Convert {{quote-Fanny Hill}} to {{RQ:Cleland Fanny Hill}}") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.references("Template:quote-Fanny Hill", start, end): blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
origt = unicode(t) head = getparam(t, "head") rmparam(t, "head") tr = getparam(t, "tr") rmparam(t, "tr") t.add("1", head) if tr: t.add("tr", tr) pagemsg("Replacing %s with %s" % (origt, unicode(t))) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Fix ru-phrase templates to use 1= instead of head=") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.references("Template:ru-phrase", start, end): process_page(i, page, args.save, args.verbose)
#!/usr/bin/env python # -*- coding: utf-8 -*- import pywikibot, re, sys, codecs, argparse import blib from blib import getparam, rmparam, msg, site parser = blib.create_argparser(u"List pages in category or references in Zaliznyak order") parser.add_argument('--cat', help="Category to list") parser.add_argument('--ref', help="References to list") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) pages = [] if args.cat: pages_to_list = blib.cat_articles(args.cat, start, end) else: pages_to_list = blib.references(args.ref, start, end) for i, page in pages_to_list: pages.append(unicode(page.title())) for page in sorted(pages, key=lambda x:x[::-1]): msg(page)
help="Categories to do (can be comma-separated list)") parser.add_argument('--refs', help="References to do (can be comma-separated list)") parser.add_argument('--lemmafile', help="File of lemmas to process. May have accents.") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) if args.lemmafile: lemmas = [] for i, pagename in blib.iter_items([ ru.remove_accents(x.strip()) for x in codecs.open(args.lemmafile, "r", "utf-8") ]): page = pywikibot.Page(site, pagename) process_page(i, page, args.verbose) elif args.refs: for ref in re.split(",", args.refs): msg("Processing references to: %s" % ref) for i, page in blib.references(ref, start, end): process_page(i, page, args.verbose) else: for cat in re.split(",", args.cats): msg("Processing category: %s" % cat) lemmas = [] if cat == "Russian verbs": for i, page in blib.cat_articles(cat): lemmas.append(page.title()) for i, page in blib.cat_articles(cat, start, end): process_page(i, page, args.verbose)
ref_namespaces = args.ref_namespaces and args.ref_namespaces.decode( "utf-8") or None lines = [x.strip() for x in codecs.open(args.tempfile, "r", "utf-8")] msg('{|class="wikitable"') msg("! Aliased template !! Canonical template !! #Uses%s%s" % (" !! Refs" if args.include_refs else "", " !! Suggested disposition" if args.include_disposition else "")) for ref_and_aliases in lines: split_refs = re.split(",", ref_and_aliases) mainref = "Template:%s" % split_refs[0] aliases = split_refs[1:] refs = [(mainref, None)] for alias in aliases: refs.append(("Template:%s" % alias, mainref)) for alias, mainref in refs: errmsg("Processing references to: %s" % alias) template_refs = list( blib.references(alias, start, end, namespaces=ref_namespaces)) num_refs = len(template_refs) msg("|-") msg("| %s || %s || %s%s%s" % ("[[%s]]" % alias if mainref else "'''[[%s]]'''" % alias, "[[%s]]" % mainref if mainref else "'''[[%s]]'''" % alias, num_refs, " || %s" % ", ".join("[[%s]]" % unicode(ref.title()) for i, ref in template_refs) if args.include_refs else "", " || ?" if args.include_disposition else "")) msg("|}")
parser = blib.create_argparser( "Add pronunciation sections to Russian Wiktionary entries") parser.add_argument('--mockup', action="store_true", help="Use mocked-up test code") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) mockup = args.mockup def ignore_page(page): if not isinstance(page, basestring): page = unicode(page.title()) if re.search(r"^(Appendix|Appendix talk|User|User talk|Talk):", page): return True return False if mockup: test_infer() else: for tempname in decl_templates: for index, page in blib.references("Template:" + tempname, start, end): if ignore_page(page): msg("Page %s %s: Skipping due to namespace" % (index, unicode(page.title()))) else: blib.do_edit(page, index, infer_one_page_decls, save=args.save)
proper_noun_headword.params.extend(remaining_params) pagemsg("Replacing %s with %s" % (orig_proper_noun_headword, unicode(proper_noun_headword))) newtext = unicode(parsed) newtext = re.sub(r"\n\n\n*\[\[Category:ru:Names]]\n\n\n*", "\n\n", newtext) newtext = re.sub(r"\[\[Category:ru:Names]]\n", "", newtext) newtext = re.sub(r"(\{\{surname\|.*)\.\n", r"\1\n", newtext) if newtext != text: if verbose: pagemsg("Replacing <<%s>> with <<%s>>" % (text, newtext)) comment = "Convert ru-adj11 to ru-decl-adj and fix up associated templates" if save: pagemsg("Saving with comment = %s" % comment) page.text = newtext page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) else: pagemsg("Skipping") parser = blib.create_argparser("Fix uses of ru-adj11") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for temp in ["ru-adj11"]: msg("Processing references to Template:%s" % temp) for i, page in blib.references("Template:" + temp, start, end): process_page(i, page, args.save, args.verbose)
#!/usr/bin/env python # -*- coding: utf-8 -*- import pywikibot, re, sys, codecs, argparse import blib from blib import getparam, rmparam, msg, site parser = blib.create_argparser(u"List pages, lemmas and/or non-lemmas") parser.add_argument('--cats', default="Russian lemmas", help="Categories to do (can be comma-separated list)") parser.add_argument('--refs', help="References to do (can be comma-separated list)") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) if args.refs: for ref in re.split(",", args.refs): msg("Processing references to: %s" % ref) for i, page in blib.references(ref, start, end): msg("Page %s %s: Processing" % (i, unicode(page.title()))) else: for cat in re.split(",", args.cats): msg("Processing category: %s" % cat) for i, page in blib.cat_articles(cat, start, end): msg("Page %s %s: Processing" % (i, unicode(page.title())))
for i in xrange(1, 6): if not t.has(str(i)): t.add(str(i), "") t.add("6", param7) notes.append("move type 7b arg7 -> arg6") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Fix up class 6a arg 6 -> 4, class 7b arg 7 -> 6") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for vclass in ["6a", "7b"]: for i, page in blib.references("Template:tracking/ru-verb/conj-%s" % vclass, start, end): process_page(i, page, args.save, args.verbose)
pval = unicode(param.value) if pname == "inline": if pval and pval not in ["0", "n", "no", "false"]: tname = "uxi" elif re.search(r"^[0-9]+$", pname): # move numbered params up by one new_params.append((str(1 + int(pname)), param.value)) elif pname == "sub": new_params.append(("subst", param.value)) else: new_params.append((pname, param.value)) del t.params[:] t.name = tname t.add("1", "ru") for pname, pval in new_params: t.add(pname, pval) notes.append("Replace {{ru-ux}} with {{%s|ru}}" % tname) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return parsed, notes parser = blib.create_argparser(u"Convert {{ru-ux}} to {{ux|ru}} or {{uxi|ru}}") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.references("Template:ru-ux", start, end): blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
return "test_infer" for pagetext in test_templates: text = blib.parse_text(pagetext) page = Page() newtext, comment = infer_one_page_decls(page, 1, text) msg("newtext = %s" % unicode(newtext)) msg("comment = %s" % comment) parser = blib.create_argparser("Add pronunciation sections to Russian Wiktionary entries") parser.add_argument('--mockup', action="store_true", help="Use mocked-up test code") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) mockup = args.mockup def ignore_page(page): if not isinstance(page, basestring): page = unicode(page.title()) if re.search(r"^(Appendix|Appendix talk|User|User talk|Talk):", page): return True return False if mockup: test_infer() else: for tempname in decl_templates: for index, page in blib.references("Template:" + tempname, start, end): if ignore_page(page): msg("Page %s %s: Skipping due to namespace" % (index, unicode(page.title()))) else: blib.do_edit(page, index, infer_one_page_decls, save=args.save)
# Put numbered params in order. for name, value, showkey in numbered_params: t.add(name, value, showkey=showkey, preserve_spacing=False) t.add("volume", volume) if chapter: t.add("chapter", chapter) if text: t.add("text", text) if translation: t.add("t", translation) # Put named params in order. for name, value, showkey in named_params: t.add(name, value, showkey=showkey, preserve_spacing=False) notes.append( "Replace {{RQ:Don Quixote}} with {{RQ:Cervantes Viardot Don Quichotte}}" ) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return parsed, notes parser = blib.create_argparser( u"Convert {{RQ:Don Quixote}} to {{RQ:Cervantes Viardot Don Quichotte}}") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.references("Template:RQ:Don Quixote", start, end): blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
elif tname in ["ru-verb"]: pagemsg("Found %s" % unicode(t)) process_verb_headword(t) elif tname in ["ru-noun", "ru-proper noun"]: pagemsg( "WARNING: Skipping ru-noun or ru-proper noun, can't handle yet: %s" % unicode(t)) parser = blib.create_argparser(u"Find red links in multiword lemmas") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) msg("Reading Russian lemmas") for i, page in blib.cat_articles("Russian lemmas", start, end): lemmas.add(unicode(page.title())) for pos in ["nouns", "proper nouns", "verbs"]: tracking_page = "Template:tracking/ru-headword/space-in-headword/" + pos msg("PROCESSING REFERENCES TO: %s" % tracking_page) for index, page in blib.references(tracking_page, start, end): process_page(index, page, args.verbose) for lemma, nonexistent_msg in sorted(nonexistent_lemmas.items(), key=lambda pair: (-lemma_count[pair[0]], pair[0])): msg("* [[%s]] (%s occurrence%s): %s (refs: %s)" % (lemma, lemma_count[lemma], "" if lemma_count[lemma] == 1 else "s", nonexistent_msg, ", ".join("[[%s]]" % x for x in nonexistent_lemmas_refs[lemma])))
changed = origt != unicode(t) if changed: notes.append("quote-poem -> quote-book with fixed params") if changed: pagemsg("Replacing %s with %s" % (origt, unicode(t))) return parsed, notes parser = blib.create_argparser( "quote-poem -> quote-book with changed params; quote-magazine/quote-news -> quote-journal; quote-Don Quixote -> RQ:Don Quixote" ) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for template in [ "quote-poem", "quote-magazine", "quote-news", "quote-Don Quixote" ]: msg("Processing references to Template:%s" % template) errmsg("Processing references to Template:%s" % template) for i, page in blib.references("Template:%s" % template, start, end, includelinks=True): blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
#!/usr/bin/env python # -*- coding: utf-8 -*- import pywikibot, re, sys, codecs, argparse import blib from blib import getparam, rmparam, msg, site parser = blib.create_argparser(u"Purge (null-save) pages in category or references") parser.add_argument('--cat', help="Category to purge") parser.add_argument('--ref', help="References to purge") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) pages = [] if args.cat: pages_to_list = blib.cat_articles(args.cat, start, end) else: pages_to_list = blib.references(args.ref, start, end) for i, page in pages_to_list: # msg("Page %s %s: Null-saving" % (i, unicode(page.title()))) page.save(comment="null save")
# but it's the default in ru-noun-table unless the lemma is plural. # So remove n=both, generate the arguments, and see if the actual # value of args.n is b (for "both"); if not, set n=both. else: assert headword_n == "b" rmparam(see_template, "n") see_generate_template = re.sub(r"^\{\{ru-noun-table", "{{ru-generate-noun-args", unicode(see_template)) see_generate_result = expand_text(see_generate_template) if not see_generate_result: pagemsg("WARNING: Error generating ru-noun-table args") return None see_args = ru.split_generate_args(see_generate_result) if see_args["n"] != "b": see_template.add("n", "both") comment = "Replace ru-decl-noun-see with ru-noun-table, taken from headword template (%s)" % unicode(headword_template.name) if save: pagemsg("Saving with comment = %s" % comment) page.text = unicode(parsed) page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser("Convert ru-decl-noun-see into ru-noun-table decl template, taken from headword ru-(proper )noun+ template") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for index, page in blib.references("Template:ru-decl-noun-see", start, end): process_page(index, page, args.save, args.verbose)
pagemsg("WARNING: Would add inanimacy to neuter, but isn't marked as indeclinable: %s" % origt) return pagemsg("Replacing %s with %s" % (origt, unicode(t))) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) if notes: comment = "Add inanimacy to neuters (%s)" % "; ".join(notes) else: comment = "Add inanimacy to neuters" if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser("Make neuter nouns be inanimate") parser.add_argument("--fix-indeclinable", action="store_true", help="Make non-indeclinables be indeclinable") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.references("Template:ru-noun", start, end): process_page(i, page, args.save, args.verbose, args.fix_indeclinable) for i, page in blib.references("Template:ru-proper noun", start, end): process_page(i, page, args.save, args.verbose, args.fix_indeclinable)
t.add("1", "hu") # Put remaining parameters in order. for name, value, showkey in params: if re.search("^[0-9]+$", name): t.add(str(int(name) + 1), value, showkey=showkey, preserve_spacing=False) else: t.add(name, value, showkey=showkey, preserve_spacing=False) blib.set_template_name(t, "affix") notes.append("convert {{hu-suffix}} to {{affix}}") if unicode(t) != origt: pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t))) return unicode(parsed), notes parser = blib.create_argparser("Clean up {{hu-suffix}}") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for template in ["hu-suffix"]: msg("Processing references to Template:%s" % template) for i, page in blib.references("Template:%s" % template, start, end): blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) found_headword_template = False for t in parsed.filter_templates(): if unicode(t.name) in ["ru-adj"]: found_headword_template = True if not found_headword_template: notes = [] for t in parsed.filter_templates(): if unicode(t.name) in [ "ru-noun", "ru-noun+", "ru-proper noun", "ru-proper noun+" ]: notes.append("found noun header (%s)" % unicode(t.name)) if unicode(t.name) == "head": notes.append("found head header (%s)" % getparam(t, "2")) pagemsg("Missing adj headword template%s" % (notes and "; " + ",".join(notes))) parser = blib.create_argparser("Find missing adjective headwords") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for index, page in blib.references("Template:ru-decl-adj", start, end): process_page(index, page)
if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(blib.group_notes(notes)) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Convert Japanese headwords from old-style to new-style") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) romaji_to_keep = set() for i, page in blib.cat_articles("Japanese terms with romaji needing attention"): pagetitle = unicode(page.title()) romaji_to_keep.add(pagetitle) for ref in ["ja-noun", "ja-adj", "ja-verb", "ja-pos"]: msg("Processing references to Template:%s" % ref) for i, page in blib.references("Template:%s" % ref, start, end): process_page(i, page, args.save, args.verbose, romaji_to_keep)
#!/usr/bin/env python #coding: utf-8 import blib, pywikibot, re, string, sys, codecs from blib import addparam import arabiclib def fix(page, index, text): for template in text.filter_templates(): if template.name in arabiclib.arabic_all_headword_templates: if template.has("head") and not template.has(1) and not template.has(2) and not template.has(3) and not template.has(4) and not template.has(5) and not template.has(6) and not template.has(7) and not template.has(8): head = unicode(template.get("head").value) template.remove("head") addparam(template, "head", head, before=template.params[0].name if len(template.params) > 0 else None) if template.params[0].name == "head": template.get("head").showkey = False return text, "ar headword: head= > 1=" startFrom, upTo = blib.parse_args() for index, page in blib.references(u"Template:tracking/ar-head/head", startFrom, upTo): blib.do_edit(page, index, fix)
def yield_ref_pages(): for template in templates_to_do: for i, page in blib.references("Template:" + template, pargs.start or None, pargs.end or None): yield i, page
import arabiclib def fix(page, index, text): for template in text.filter_templates(): if template.name in arabiclib.arabic_all_headword_templates: if template.has("head") and not template.has( 1) and not template.has(2) and not template.has( 3) and not template.has(4) and not template.has( 5) and not template.has(6) and not template.has( 7) and not template.has(8): head = unicode(template.get("head").value) template.remove("head") addparam(template, "head", head, before=template.params[0].name if len(template.params) > 0 else None) if template.params[0].name == "head": template.get("head").showkey = False return text, "ar headword: head= > 1=" startFrom, upTo = blib.parse_args() for index, page in blib.references(u"Template:tracking/ar-head/head", startFrom, upTo): blib.do_edit(page, index, fix)