def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") notes = [] for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn in ["diminutive of", "dim of"]: if t.has("pos"): pos = re.sub("s$", "", getparam(t, "pos")) t.add("POS", pos, before="pos") rmparam(t, "pos") notes.append("Convert plural pos= to singular POS= in {{%s}}" % tn) if unicode(t) != origt: pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t))) return unicode(parsed), notes parser = blib.create_argparser( "Convert plural pos= to singular POS= in {{diminutive of}}", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True)
if val: seenval = True if seenval: t.add(str(i + 1), val) t.add("1", conjtype) blib.sort_params(t) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Convert ru-conj-* to ru-conj and move variant") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.cat_articles("Russian verbs", start, end): process_page(i, page, args.save, args.verbose)
if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(blib.group_notes(notes)) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Convert Japanese headwords from old-style to new-style") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) romaji_to_keep = set() for i, page in blib.cat_articles("Japanese terms with romaji needing attention"): pagetitle = unicode(page.title()) romaji_to_keep.add(pagetitle) for ref in ["ja-noun", "ja-adj", "ja-verb", "ja-pos"]: msg("Processing references to Template:%s" % ref) for i, page in blib.references("Template:%s" % ref, start, end): process_page(i, page, args.save, args.verbose, romaji_to_keep)
# find_rfdef.py is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # Find pages that need definitions among a set list (e.g. most frequent words). import pywikibot, re, sys, codecs, argparse import blib from blib import getparam, rmparam, msg, site parser = blib.create_argparser(u"Find pages that need definitions") parser.add_argument("--pagefile", help="File containing pages to check") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) lines = set([x.strip() for x in codecs.open(args.pagefile, "r", "utf-8")]) for i, page in blib.cat_articles("Russian entries needing definition", start, end): pagetitle = page.title() if pagetitle in lines: msg("* Page %s [[%s]]" % (i, pagetitle))
"impf", "impf-intr", "impf-refl", "impf-impers", "impf-intr-impers", "impf-refl-impers"]: conjtype = getparam(t, "1") t.add("2", conjtype) t.add("1", verbtype) notes.append("move verb type from arg 2 to arg 1") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Move verb type from arg 2 to arg 1") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.cat_articles("Russian verbs", start, end): process_page(i, page, args.save, args.verbose)
text = unicode(page.text) foundrussian = False sections = re.split("(^==[^=]*==\n)", text, 0, re.M) for j in xrange(2, len(sections), 2): if sections[j-1] == "==Russian==\n": if foundrussian: pagemsg("WARNING: Found multiple Russian sections, skipping page") return foundrussian = True found_headword_template = False parsed = blib.parse_text(sections[j]) for t in parsed.filter_templates(): tname = unicode(t.name) if tname == "ru-adj" or (tname == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "adjective form"): found_headword_template = True if not found_headword_template and "===Adjective===" in sections[j]: pagemsg("WARNING: Missing adj headword template") parser = blib.create_argparser("Find missing adjective headwords") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for cat in ["Russian adjectives", "Russian adjective forms", "Russian lemmas", "Russian non-lemma forms"]: msg("Processing category %s" % cat) for index, page in blib.cat_articles(cat, start, end): process_page(index, page)
if val != newval: pagemsg("Removing accents from 1= in {{wikipedia|...}}") notes.append("remove accents from 1= in {{wikipedia|...}}") t.add("1", newval) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Remove accents from 1= in {{wikipedia|...}}") parser.add_argument('--pagefile', help="File containing pages to fix.") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) lines = [x.strip() for x in codecs.open(args.pagefile, "r", "utf-8")] for i, page in blib.iter_items(lines, start, end): process_page(i, pywikibot.Page(site, page), args.save, args.verbose)
pagemsg("Found additional named param %s" % unicode(param)) t.add("3", presstem) if direc: t.add("4", "") t.add("5", direc) blib.sort_params(t) #blib.set_param_chain(t, ppps, "past_pasv_part", "past_pasv_part") notes.append("set class-7b verb to directive %s%s" % (direc, npp and u" (no ё in present stem)" or "")) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes parser = blib.create_argparser(u"Fix up class-7b arguments") parser.add_argument('--direcfile', help="File containing pages to fix and directives.") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) pagedirecs = [] lines = [x.strip() for x in codecs.open(args.direcfile, "r", "utf-8")] for i, line in blib.iter_items(lines, start, end): if line.startswith("#"): msg("Skipping comment: %s" % line) elif " " not in line: msg("Skipping because no space: %s" % line) elif "7b" not in line: msg("Skipping because 7b not in line: %s" % line) else: page, direc = re.split(" ", line)
continue if not g: pagemsg("WARNING: Didn't see gender: %s" % unicode(t)) continue origt = unicode(t) del t.params[:] blib.set_template_name(t, "it-noun") if head: t.add("head", head) t.add("1", g) if g2: t.add("g2", g2) t.add("2", "-") notes.append( "replace {{head|it|noun|...|invariable}} with {{it-noun|...|-}}" ) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes parser = blib.create_argparser( "Replace {{head|it|noun|...|invariable}} with {{it-noun|...|-}}", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True)
else: pagemsg("WARNING: Unrecognized param 2: %s" % origt) continue allow_2 = True if lemma: bad_param = False for param in t.params: pname = unicode(param.name) if pname.strip() == "1" or allow_2 and pname.strip() == "2": continue pagemsg("WARNING: Unrecognized param %s=%s: %s" % ( pname, param.value, origt)) bad_param = True if bad_param: continue rmparam(t, "2") t.add("1", lemma) blib.set_template_name(t, "la-part") pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append(u"convert {{%s}} to {{la-part}}" % tn) return unicode(parsed), notes parser = blib.create_argparser(u"Convert Latin participle headwords to use {{la-part}}", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, default_cats=["Latin participles"], edit=True)
"WARNING: Can't handle %s=%s in %s: <from> %s <to> %s <end>" % (pn, unicode(param.value), origline, origline)) return origline return "{{name translit|%s|%s|%s|type=%s%s}}%s" % ( thislangcode, source_lang_code, name, name_type, "|eq=%s" % eq if eq else "", period) newsec = re.sub( r"'*(?:\{\{(?:non-gloss definition|non-gloss|ngd|n-g)\|)*A \[*transliteration\]* of the ([A-Z][a-z]*) (male given name|female given name|surname|patronymic) (\{\{[lm]\|[a-zA-Z-]*\|[^{}]*?\}\})\}*'*(\.?)'*}*", replace_name_translit, sections[j]) if newsec != sections[j]: notes.append("templatize {{name translit}} usage for lang '%s'" % thislangname) sections[j] = newsec return "".join(sections), notes parser = blib.create_argparser( "Templatize 'A transliteration of LANG name NAME' into {{name translit}}", include_pagefile=True, include_stdin=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_text_on_page, edit=True, stdin=True)
text = unicode(page.text) parsed = blib.parse_text(text) for t in parsed.filter_templates(): tn = tname(t) origt = unicode(t) param = None if tn in ["bg-noun", "bg-proper noun", "bg-verb", "bg-adj", "bg-adv", "bg-part", "bg-part form", "bg-verbal noun", "bg-verbal noun form", "bg-phrase"]: param = "1" elif tn == "head" and getparam(t, "1") == "bg": param = "head" if param: val = getparam(t, param) val = bglib.decompose(val) if GR in val: val = val.replace(GR, AC) t.add(param, val) notes.append("convert grave to acute in {{%s}}" % tn) if unicode(t) != origt: pagemsg("Replaced %s with %s" % (origt, unicode(t))) return unicode(parsed), notes parser = blib.create_argparser("Change grave to acute in Bulgarian headwords", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, default_cats=["Bulgarian lemmas", "Bulgarian non-lemma forms"], edit=1)
if not mm: pagemsg("WARNING: Saw unparsable part %s, not changing: %s" % (parts[i], m.group(0))) return m.group(0) if TEMPSEP in parts[i]: pagemsg( "WARNING: Internal error: Saw Unicode FFF0 in part %s, not changing: %s" % parts[i], m.group(0)) return m.group(0) parts[i] = "{{l|pl|%s}}" % mm.group(1) notes.append("replace multipart {{l|pl|...}} with separate links") return ", ".join(parts) text = re.sub(r"\{\{l\|pl\|([^{}]*[\[\]][^{}]*)\}\}", split_links, text) return text, notes parser = blib.create_argparser( "Split {{l|pl|...}} links containing multiple entries", include_pagefile=True, include_stdin=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_text_on_page, edit=True, stdin=True)
headt = None headt = t elif tn in ["be-decl-noun", "be-decl-noun-unc", "be-decl-noun-pl"]: if not headt: pagemsg("WARNING: Encountered declension template without headword: %s" % unicode(t)) else: process_noun_headt(headt, t) headt = None elif tn == "rfinfl" and getparam(t, "1") == "be": if headt: process_noun_headt(headt) headt = None elif tn == "be-verb": process_verb_headt(t) elif tn == "be-adj": process_adj_headt(t) if headt: pagemsg("WARNING: Encountered headword template without declension: %s" % unicode(headt)) process_noun_headt(headt) return unicode(parsed), notes parser = blib.create_argparser(u"Clean up be-noun params", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, #default_refs=["Template:be-adj", "Template:be-verb", "Template:be-noun"], edit=True) default_cats=["Belarusian proper nouns", "Belarusian nouns"], edit=True)
elif getparam(t, "p"): pagemsg("WARNING: Found unexpected p=%s: %s" % (getparam(t, "p"), unicode(t))) if not re.search("[ -]", pagetitle) and (getparam(t, "f") or getparam(t, "mp") or getparam(t, "fp") or getparam(t, "p")): pagemsg("Found remaining explicit feminine or plural in single-word base form: %s" % unicode(t)) newt = unicode(t) if origt != newt: pagemsg("Replacing %s with %s" % (origt, newt)) newtext = unicode(parsed) if newtext != text: assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = newtext page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser("Remove extraneous params from {{fr-adj}}") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for cat in ["French adjectives"]: msg("Processing category: %s" % cat) for i, page in blib.cat_articles(cat, start, end): process_page(i, page, args.save, args.verbose)
def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) cat = do_noun and "nouns" or "proper nouns" new_text = re.sub(r"\n\n\n*\[\[Category:Russian %s]]\n\n\n*" % cat, "\n\n", text) new_text = re.sub(r"\[\[Category:Russian %s]]\n" % cat, "", new_text) return new_text, "Remove redundant [[:Category:Russian %s]]" parser = blib.create_argparser("Remove redundant 'Russian nouns' category", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) def do_process_page_do_noun_true(page, index, parsed): return process_page(page, index, do_noun=True) def do_process_page_do_noun_false(page, index, parsed): return process_page(page, index, do_noun=False) # FIXME! Won't work properly with --pagefile. blib.do_pagefile_cats_refs( args,
if new_noun_table_template != orig_noun_table_template: pagemsg("Replacing noun table %s with %s" % (orig_noun_table_template, new_noun_table_template)) new_headword_template = unicode(headword_template) if new_headword_template != orig_headword_template: pagemsg("Replacing headword %s with %s" % (orig_headword_template, new_headword_template)) if unicode(headword_template.name) == "ru-noun+": ru_noun_changed = 1 else: ru_proper_noun_changed = 1 return unicode(parsed), ru_noun_table_cleaned, ru_noun_table_link_copied, ru_noun_changed, ru_proper_noun_changed parser = blib.create_argparser("Copy the declension in ru-noun-table to ru-noun+, preserving any m=, f=, g=, etc. in the latter.") parser.add_argument('--cats', default="nouns,proper nouns", help="Categories to do ('nouns', 'proper nouns' or 'nouns,proper nouns')") parser.add_argument('--lemma-file', help="File containing lemmas to copy declension of. Will remove extraneous params from ru-noun-table and copy links to ru-noun-table regardless of this.") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) if args.lemma_file: lemmas = set([x.strip() for x in codecs.open(args.lemma_file, "r", "utf-8")]) else: lemmas = None for cat in re.split(",", args.cats): if cat == "nouns": template = "Template:ru-noun+" elif cat == "proper nouns": template = "Template:ru-proper noun+"
def add_links(m): prefix = m.group(1) if re.search(u"[гкх]о$", prefix): first = prefix[:-1] + u"ий" else: first = prefix[:-1] + u"ый" return u"[[%s|%s]]-[[%s]]" % (rulib.remove_accents(first), prefix, m.group(2)) t.add("1", re.sub(u"^(.*?о)-([^-]*)$", add_links, head)) notes.append("add links to two-part adjective") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes parser = blib.create_argparser("Add links to two-part adjectives", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True, default_cats=["Russian adjectives"])
rmparam(t, "1") notes.append("remove redundant 1= from {{%s}}" % name) else: pagemsg("Not removing non-redundant 1=%s" % head) check_bad_head(head, "1") newt = unicode(t) if origt != newt: pagemsg("Replacing %s with %s" % (origt, newt)) newtext = unicode(parsed) if newtext != text: assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = newtext page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser("Remove redundant head= from French terms") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) #for cat in ["French lemmas", "French non-lemma forms"]: for cat in ["French lemmas"]: msg("Processing category: %s" % cat) for i, page in blib.cat_articles(cat, start, end): process_page(i, page, args.save, args.verbose)
if tname(t) in [ "ru-conj", "ru-conj-old", "User:Benwing2/ru-conj", "User:Benwing2/ru-conj-old" ]: t.add("1", getparam(t, "1").replace("-refl", "")) elif tname(t) == "temp" and getparam(t, "1") == "ru-conj": t.add("2", getparam(t, "2").replace("-refl", "")) newt = unicode(t) if origt != newt: notes.append("remove -refl from verb type") pagemsg("Replaced %s with %s" % (origt, newt)) return parsed, notes parser = blib.create_argparser( u"Fix up verb conjugations to not specify -refl") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_edit(pywikibot.Page(site, "User:Benwing2/test-ru-verb"), 1, process_page, save=args.save, verbose=args.verbose) blib.do_edit(pywikibot.Page(site, "User:Benwing2/test-ru-verb-2"), 2, process_page, save=args.save, verbose=args.verbose) for ref in ["Template:ru-conj-old"]: msg("Processing references to: %s" % ref)
tname = unicode(t.name) if tname in ru_head_templates: headname = tname found_this_head = True elif tname == "head" and getparam(t, "1") == "ru": headtype = getparam(t, "2") headname = "head|ru|%s" % headtype if headtype in ru_heads_to_warn_about: pagemsg("WARNING: Found %s" % headname) found_this_head = True if found_this_head: cat_head_count[headname] = cat_head_count.get(headname, 0) + 1 overall_head_count[headname] = overall_head_count.get(headname, 0) + 1 found_page_head = True if not found_page_head: pagemsg("WARNING: No head") if index % 100 == 0: output_heads_seen() parser = blib.create_argparser(u"Find Russian terms without a proper headword line") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for category in ["Russian nouns", "Russian proper nouns", "Russian pronouns", "Russian determiners", "Russian adjectives", "Russian verbs", "Russian participles", "Russian adverbs", "Russian prepositions", "Russian conjunctions", "Russian interjections", "Russian idioms", "Russian phrases", "Russian abbreviations", "Russian acronyms", "Russian initialisms", "Russian noun forms", "Russian proper noun forms", "Russian pronoun forms", "Russian determiner forms", "Russian verb forms", "Russian adjective forms", "Russian participle forms"]: cat_head_count = {} msg("Processing category: %s" % category) for i, page in blib.cat_articles(category, start, end): process_page(i, page, args.save, args.verbose) output_heads_seen() output_heads_seen(overall=True)
lang = getparam(t, "1") termparam = 2 if lang != "la": #pagemsg("WARNING: Wrong language in template: %s" % unicode(t)) continue term = getparam(t, str(termparam)) alt = getparam(t, str(termparam + 1)) gloss = getparam(t, str(termparam + 2)) if alt and lalib.remove_macrons(alt) == term: origt = unicode(t) t.add(str(termparam), alt) if gloss: t.add(str(termparam + 1), "") else: rmparam(t, str(termparam + 1)) pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append("move alt param to link param in %s" % tn) secbody = unicode(parsed) sections[j] = secbody + sectail return "".join(sections), notes parser = blib.create_argparser( "Move alt param to term param in {{l}}, {{m}}, {{alternative form of}}, {{alt form}}", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True)
pagemsg("WARNING: Script no longer applies and would need fixing up") return pagemsg("Processing") new_text = "#REDIRECT [[Module:ru-verb/documentation]]" comment = "redirect to [[Module:ru-verb/documentation]]" if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Redirect ru-conj-* documentation pages") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) types = [ "7a", "7b", "8a", "8b", "9a", "9b", "10a", "10c", "11a", "11b", "12a",
"Phonetic respelling %s (translit %s) in %s agrees with head translit %s, auto translit %s" % (",".join(phon_respellings), ",".join(respelling_tr), unicode(t), ",".join(head_template_tr), ",".join(head_auto_tr))) if noun_head_template and head_template_tr and not saw_ndecl: pagemsg( "WARNING: Missing declension for noun needing phonetic respelling, headtr=%s, autotr=%s: %s" % (",".join(head_template_tr), ",".join(head_auto_tr), unicode(noun_head_template))) return unicode(parsed), notes parser = blib.create_argparser( "Remove redundant translit from Hindi headwords and check translit against phonetic respelling", include_pagefile=True, include_stdin=True) parser.add_argument('--direcfile', help="File containing output from find_regex.py.") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_text_on_page, default_cats=["Hindi lemmas"], edit=True, stdin=True)
if unicode(t.name) == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "verb form": found_head_verb_form = True if not found_head_verb_form or not found_inflection_of: # Find definition line foundrussian = False sections = re.split("(^==[^=]*==\n)", unicode(page.text), 0, re.M) for j in xrange(2, len(sections), 2): if sections[j-1] == "==Russian==\n": if foundrussian: pagemsg("WARNING: Found multiple Russian sections, skipping page") return foundrussian = True deflines = r"\n".join(re.findall(r"^(# .*)$", sections[j], re.M)) if not found_head_verb_form: pagemsg("WARNING: No {{head|ru|verb form}}: %s" % deflines) if not found_inflection_of: pagemsg("WARNING: No 'inflection of': %s" % deflines) parser = blib.create_argparser(u"Find badly formatted Russian verb forms") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for category in ["Russian verb forms"]: msg("Processing category: %s" % category) for i, page in blib.cat_articles(category, start, end): process_page(i, page, args.save, args.verbose)
if not re.search(r"\.\s*$", notesval): notesval = re.sub(r"(\s*)$", r".\1", notesval) t.add("footnote", notesval, before="notes", preserve_spacing=False) rmparam(t, "notes") blib.set_template_name(t, "be-adecl-manual") notes.append("convert {{be-adj-table}} to {{be-adecl-manual}}") if origt != unicode(t): pagemsg("Replaced %s with %s" % (origt, unicode(t))) return unicode(parsed), notes parser = blib.create_argparser( u"Convert old Belarusian adjective declension templates to new ones", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, default_cats=[ "Belarusian adjectives", "Belarusian pronouns", "Belarusian determiners" ], edit=True)
def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) found_audio = False for t in parsed.filter_templates(): if unicode(t.name) == "audio" and getparam(t, "lang") == "ru": found_audio = True break if found_audio: new_text = re.sub(r"\n*\[\[Category:Russian terms with audio links]]\n*", "\n\n", text) if new_text != text: comment = "Remove redundant [[:Category:Russian terms with audio links]]" if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser("Remove redundant audio-link categories") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.cat_articles("Russian terms with audio links", start, end): process_page(i, page, args.save, args.verbose)
# Find definition line foundrussian = False sections = re.split("(^==[^=]*==\n)", unicode(page.text), 0, re.M) for j in xrange(2, len(sections), 2): if sections[j - 1] == "==Russian==\n": if foundrussian: pagemsg( "WARNING: Found multiple Russian sections, skipping page" ) return foundrussian = True deflines = r"\n".join( re.findall(r"^(# .*)$", sections[j], re.M)) if not found_head_verb_form: pagemsg("WARNING: No {{head|ru|verb form}}: %s" % deflines) if not found_inflection_of: pagemsg("WARNING: No 'inflection of': %s" % deflines) parser = blib.create_argparser(u"Find badly formatted Russian verb forms") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for category in ["Russian verb forms"]: msg("Processing category: %s" % category) for i, page in blib.cat_articles(category, start, end): process_page(i, page, args.save, args.verbose)
notes.append("replaced {{head|fr|%s}} with {{%s}}%s" % (headtype, unicode(t.name), " (NEEDS REVIEW)" if fixed_plural_warning else "")) newtext = unicode(parsed) if newtext != text: assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = newtext blib.try_repeatedly(lambda: page.save(comment=comment), pagemsg, "save page") else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser("Convert head|fr|* to fr-*") parser.add_argument("--fix-missing-plurals", action="store_true", help="Fix cases with missing plurals by just assuming the default plural.") parser.add_argument("--lemma-file",help="File containing lemmas to do.") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) if args.lemma_file: lines = [x.strip() for x in codecs.open(args.lemma_file, "r", "utf-8")] for i, pagename in blib.iter_items(lines, start, end): process_page(i, pywikibot.Page(site, pagename), args.save, args.verbose, args.fix_missing_plurals) else: for cat in ["French nouns", "French proper nouns", "French pronouns", "French determiners", "French adjectives", "French verbs", "French participles", "French adverbs", "French prepositions", "French conjunctions", "French interjections", "French idioms", "French phrases", "French abbreviations", "French acronyms", "French initialisms", "French noun forms", "French proper noun forms", "French pronoun forms", "French determiner forms", "French verb forms", "French adjective forms", "French participle forms", "French proverbs", "French prefixes", "French suffixes", "French diacritical marks", "French punctuation marks"]: #for cat in ["French adjective forms", "French participle forms", "French proverbs", "French prefixes", "French suffixes", "French diacritical marks", "French punctuation marks"]: msg("Processing category: %s" % cat) for i, page in blib.cat_articles(cat, start, end): process_page(i, page, args.save, args.verbose, args.fix_missing_plurals)
rmparam(headword_template, "g2") rmparam(headword_template, "g3") rmparam(headword_template, "g4") rmparam(headword_template, "g5") for gnum, g in enumerate(genders): param = "g" if gnum == 0 else "g" + str(gnum + 1) headword_template.add(param, g) pagemsg("Replacing %s with %s" % (orig_template, unicode(headword_template))) return unicode( parsed ), "Fix headword gender, substituting new value %s" % ",".join(genders) parser = blib.create_argparser( "Fix gender errors introduced by fix_ru_noun.py") parser.add_argument('--direcfile', help="File containing pages and warnings to process", required=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) # * Page 3574 [[коала]]: WARNING: Gender mismatch, existing=m-an,f-an, new=f-an lines = [x.strip() for x in codecs.open(args.pagefile, "r", "utf-8")] for i, line in blib.iter_items(lines, start, end): m = re.search( "^\* Page [0-9]+ \[\[(.*?)\]\]: WARNING: Gender mismatch, existing=(.*?), new=.*?$", line) if not m: msg("WARNING: Can't process line: %s" % line) else:
#!/usr/bin/env python # -*- coding: utf-8 -*- import pywikibot, re, sys, codecs, argparse import blib from blib import getparam, rmparam, msg, site import rulib as ru parser = blib.create_argparser(u"Delete ru-conj-* templates and documentation pages") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) msg("WARNING: Script no longer applies and would need fixing up") types = ["1a", "2a", "2b", "3oa", "3a", "3b", "3c", "4a", "4b", "4c", "5a", "5b", "5c", "6a", "6b", "6c", "7a", "7b", "8a", "8b", "9a", "9b", "10a", "10c", "11a", "11b", "12a", "12b", "13b", "14a", "14b", "14c", "15a", "16a", "16b", u"irreg-бежать", u"irreg-спать", u"irreg-хотеть", u"irreg-дать", u"irreg-есть", u"irreg-сыпать", u"irreg-лгать", u"irreg-мочь", u"irreg-слать", u"irreg-идти", u"irreg-ехать", u"irreg-минуть", u"irreg-живописать-миновать", u"irreg-лечь", u"irreg-зиждиться", u"irreg-клясть", u"irreg-слыхать-видать", u"irreg-стелить-стлать", u"irreg-быть", u"irreg-ссать-сцать", u"irreg-чтить", u"irreg-шибить", u"irreg-плескать", u"irreg-реветь", u"irreg-внимать", u"irreg-внять", u"irreg-обязывать"] for i, ty in blib.iter_items(types, start, end): template_page = pywikibot.Page(site, "Template:ru-conj-%s" % ty) if template_page.exists():
def pagemsg(txt): msg("Page %s %s: %s" % (index, lemma, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, lemma, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, lemma, pagemsg, verbose) pagemsg("Processing") for formind, form in blib.iter_items(forms): delete_form(index, lemma, formind, form, lang, save, verbose, diff) parser = blib.create_argparser(u"Delete bad forms for inflected languages") parser.add_argument('--formfile', help="File containing lemmas and forms to delete.", required=True) parser.add_argument('--lang', help="Language ('es' or 'it').", choices=["es", "it"], required=True) parser.add_argument('--output-pages-to-delete', help="File to write pages to delete.") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) pages_to_delete = [] lines = [x.strip() for x in codecs.open(args.formfile, "r", "utf-8")] for index, line in blib.iter_items(lines, start, end):
t.add("4", number) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) notes.append("converted '%s|%s' to '%s|%s'" % (number, case, case, number)) sections[j] = unicode(parsed) new_text = "".join(sections) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(blib.group_notes(notes)) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Canonicalize 'inflection of' for noun forms") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for category in ["Russian noun forms"]: msg("Processing category: %s" % category) for i, page in blib.cat_articles(category, start, end): process_page(i, page, args.save, args.verbose)
def process_page(index, page, save, verbose): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") if not re.search(ur"(ник|ок)([ -]|$)", pagetitle): return parsed = blib.parse(page) for t in parsed.filter_templates(): tname = unicode(t.name) if tname == "ru-noun-table": ut = unicode(t) if re.search(ur"ни́к(\||$)", ut) and "|b" not in ut: pagemsg("WARNING: Likely missing accent b: %s" % ut) if re.search(ur"о́к(\||$)", ut) and "*" in ut and "|b" not in ut: pagemsg("WARNING: Likely missing accent b: %s" % ut) parser = blib.create_argparser(u"Find likely missing accent b") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for category in ["Russian nouns"]: msg("Processing category: %s" % category) for i, page in blib.cat_articles(category, start, end): process_page(i, page, args.save, args.verbose)
#!/usr/bin/env python # -*- coding: utf-8 -*- import pywikibot, re, sys, codecs, argparse import blib from blib import getparam, rmparam, msg, site parser = blib.create_argparser(u"List pages, lemmas and/or non-lemmas") parser.add_argument('--cats', default="Russian lemmas", help="Categories to do (can be comma-separated list)") parser.add_argument('--refs', help="References to do (can be comma-separated list)") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) if args.refs: for ref in re.split(",", args.refs): msg("Processing references to: %s" % ref) for i, page in blib.references(ref, start, end): msg("Page %s %s: Processing" % (i, unicode(page.title()))) else: for cat in re.split(",", args.cats): msg("Processing category: %s" % cat) for i, page in blib.cat_articles(cat, start, end): msg("Page %s %s: Processing" % (i, unicode(page.title())))
t.add("2", "irreg/c'") notes.append( "make past stress /c' explicit in irreg verb") else: t.add("2", "irreg/c") notes.append( "make past stress /c explicit in irreg verb") elif param2 == "irreg/a": t.add("2", "irreg") notes.append("make past stress /a default in irreg verb") elif not param2.startswith("irreg/"): errpagemsg("WARNING: Unable to parse param2 %s" % param2) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return parsed, notes parser = blib.create_argparser( u"Fix up class-8 and irregular arguments to have class a as default past stress" ) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.cat_articles("Russian class 8b verbs", start, end): blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose) for i, page in blib.cat_articles("Russian irregular verbs", start, end): blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) pagemsg("Processing") parsed = blib.parse(page) for t in parsed.filter_templates(): if unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam(t, "1").startswith("pf"): if tname == "ru-conj": tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t)) else: tempcall = re.sub(r"\{\{ru-conj-old", "{{ru-generate-verb-forms|old=y", unicode(t)) result = expand_text(tempcall) if not result: pagemsg("WARNING: Error generating forms, skipping") continue args = rulib.split_generate_args(result) for base in ["past_pasv_part", "ppp"]: for i in ["", "2", "3", "4", "5", "6", "7", "8", "9"]: val = getparam(t, base + i) if val and val != "-": val = re.sub("//.*", "", val) pagemsg("Found perfective past passive participle: %s" % val) parser = blib.create_argparser(u"Find Russian perfective verbs with explicit past passive participles") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for category in ["Russian verbs"]: for i, page in blib.cat_articles(category, start, end): process_page(i, page, args.save, args.verbose)
forms_seen.add(form_no_macrons) slots_and_forms_to_process.append((slot, form)) for formindex, (slot, form) in blib.iter_items( sorted(slots_and_forms_to_process, key=lambda x: lalib.remove_macrons(x[1]))): def handler(page, formindex, parsed): return process_form(page, formindex, slot, form, pos, pagemsg) blib.do_edit(pywikibot.Page(site, lalib.remove_macrons(form)), "%s.%s" % (index, formindex), handler, save=args.save, verbose=args.verbose, diff=args.diff) parser = blib.create_argparser( u"Correct headers/headwords of non-lemma forms with the wrong part of speech", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs( args, start, end, process_page, default_cats=["Latin participles", "Latin proper nouns"])
rmparam(headword_template, "g4") rmparam(headword_template, "g5") for gnum, g in enumerate(genders): param = "g" if gnum == 0 else "g" + str(gnum+1) headword_template.add(param, g) pagemsg("Replacing %s with %s" % (orig_template, unicode(headword_template))) comment = "Fix headword gender, substituting new value %s" % ",".join(genders) if save: pagemsg("Saving with comment = %s" % comment) page.text = unicode(parsed) page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser("Fix gender errors introduced by fix_ru_noun.py") parser.add_argument('--pagefile', help="File containing pages and warnings to process") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) # * Page 3574 [[коала]]: WARNING: Gender mismatch, existing=m-an,f-an, new=f-an lines = [x.strip() for x in codecs.open(args.pagefile, "r", "utf-8")] for i, line in blib.iter_items(lines, start, end): m = re.search("^\* Page [0-9]+ \[\[(.*?)\]\]: WARNING: Gender mismatch, existing=(.*?), new=.*?$", line) if not m: msg("WARNING: Can't process line: %s" % line) else: page, genders = m.groups() msg("Page %s %s: Processing: %s" % (i, page, line)) process_page(i, pywikibot.Page(site, page), args.save, args.verbose, re.split(",", genders))
def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") parsed = blib.parse(page) found_headword_template = False for t in parsed.filter_templates(): if unicode(t.name) in ["ru-adj"]: found_headword_template = True if not found_headword_template: notes = [] for t in parsed.filter_templates(): if unicode(t.name) in [ "ru-noun", "ru-noun+", "ru-proper noun", "ru-proper noun+" ]: notes.append("found noun header (%s)" % unicode(t.name)) if unicode(t.name) == "head": notes.append("found head header (%s)" % getparam(t, "2")) pagemsg("Missing adj headword template%s" % (notes and "; " + ",".join(notes))) parser = blib.create_argparser("Find missing adjective headwords") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for index, page in blib.references("Template:ru-decl-adj", start, end): process_page(index, page)
msg("Would remove past overrides and add arg5=b") else: msg("WARNING: Remaining past overrides: past_m=%s, past_f=%s, past_n=%s, past_pl=%s, expected_past_m=%s, expected_past_f=%s, expected_past_n=%s, expected_past_pl=%s" % (past_m, past_f, past_n, past_pl, expected_past_m, expected_past_f, expected_past_n, expected_past_pl)) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Convert class-7 past overrides to past stress pattern") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for category in ["Russian class 7 verbs"]: msg("Processing category: %s" % category) for i, page in blib.cat_articles(category, start, end): process_page(i, page, args.save, args.verbose)
"Not adding alt=%s because it's the same as the term" % t_alt) else: t.add("alt", t_alt) if t_tr: t.add("tr", t_tr) if t_sort: t.add("sort", t_sort) if t_sc: t.add("sc", t_sc) notes.append("convert {{%s}} to {{auto cat}}" % tn) if unicode(t) != origt: pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t))) return unicode(parsed), notes parser = blib.create_argparser("Convert affix cat usages to {{auto cat}}", include_pagefile=True, include_stdin=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_text_on_page, edit=True, stdin=True)
if origtext != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (origtext, text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) elif warn_on_no_change: pagemsg("WARNING: No changes") parser = blib.create_argparser(u"Fix indentation of Pronunciation, Declension, Conjugation, Alternative forms sections") parser.add_argument("--pagefile", help="""List of pages to process.""") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) if args.pagefile: lines = [x.strip() for x in codecs.open(args.pagefile, "r", "utf-8")] for i, line in blib.iter_items(lines, start, end): m = re.search("^Page [0-9]+ (.*?): WARNING: .*?$", line) if not m: msg("WARNING: Can't process line: %s" % line) else: page = m.group(1) process_page(i, pywikibot.Page(site, page), args.save, args.verbose, warn_on_no_change=True)
for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn == "la-ndecl": num_ndecl_templates += 1 lemmaspec = getparam(t, "1") m = re.search("^(.*)<(.*)>$", lemmaspec) if not m: pagemsg("WARNING: Unable to parse lemma+spec %s, skipping: %s" % ( lemmaspec, origt)) continue lemma, spec = m.groups() if ".-ium" not in spec: spec += ".-ium" t.add("1", "%s<%s>" % (lemma, spec)) pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append("add .-ium to declension of Latin chemical element") if num_ndecl_templates > 1: pagemsg("WARNING: Saw multiple {{la-ndecl}} templates, some may not be elements") return None, None return unicode(parsed), notes parser = blib.create_argparser("Add missing .-ium to Latin elements", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, default_cats=["la:Chemical elements"], edit=True)
notes.append("moving past_m %s to arg 3" % past_m) else: pagemsg("Stem %s and past_m %s are different, putting past_m in param 5" % ( stem, past_m)) t.add("5", past_m) notes.append("moving past_m %s to arg 5" % past_m) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Fix up class-8 arguments") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.cat_articles("Russian class 8 verbs", start, end): process_page(i, page, args.save, args.verbose)
notes.append( "note transitive unpaired imperfective verb as lacking past passive participle" ) pagemsg("Note no PPP, replace %s with %s" % (origt, unicode(t))) elif direc == "paired": pagemsg("Verb actually is paired") elif direc == "fixed": pagemsg("WARNING: Unfixed verb marked as fixed") elif direc == "intrans": pagemsg("WARNING: Transitive verb marked as intrans") return unicode(parsed), notes parser = blib.create_argparser( u"Find verbs with missing past passive participles") parser.add_argument('--fix-pagefile', help="File containing pages to fix.") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) if args.fix_pagefile: fixdireclines = [ x.strip() for x in codecs.open(args.fix_pagefile, "r", "utf-8") ] fixdirecs = {} fixpages = [] for line in fixdireclines: verb, direc = re.split(" ", line) fixdirecs[verb] = direc fixpages.append(verb)
if verbtype == "pf-impers-refl": t.add("1", "pf-refl-impers") notes.append("pf-impers-refl -> pf-refl-impers") if verbtype == "impf-impers-refl": t.add("1", "impf-refl-impers") notes.append("impf-impers-refl -> impf-refl-impers") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Change verb type *-impers-refl to *-refl-impers") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, page in blib.cat_articles("Russian verbs", start, end): process_page(i, page, args.save, args.verbose)
elif re.search(r"^'*optional'*$", para2): opt = True para2 = None origt = unicode(t) t.add("1", para1) if para2: t.add("2", "") t.add("3", para2) else: rmparam(t, "2") if req: t.add("req", "1") if opt: t.add("opt", "1") blib.set_template_name(t, "para") pagemsg("Replaced %s with %s" % (origt, unicode(t))) if para2: pagemsg("Set additional info param 3=%s in %s" % (para2, unicode(t))) notes.append(u"convert {{docparam}} to {{para}}") return unicode(parsed), notes parser = blib.create_argparser("Deprecate {{docparam}} in favor of {{para}}", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True)
return if text != newtext: if verbose: pagemsg("Replacing <<%s>> with <<%s>>" % (text, newtext)) comment = "Replace raw links with templated links: %s" % ",".join(subbed_links) if save: pagemsg("Saving with comment = %s" % comment) page.text = newtext page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) if __name__ == "__main__": parser = blib.create_argparser("Replace raw links with templated links") parser.add_argument('--lang', help="Language code for language to do") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) if not args.lang: raise ValueError("Language code must be specified") if args.lang not in languages: raise ValueError("Unrecognized language code: %s" % args.lang) thislangcode = args.lang thislangname, this_remove_accents, this_charset, this_ignore_translit = ( languages[thislangcode]) for category in ["%s lemmas" % thislangname, "%s non-lemma forms" % thislangname]: msg("Processing category: %s" % category) for i, page in blib.cat_articles(category, start, end):
0, re.M) if new_secbody != secbody: notes.append( "remove bad Chinese links (see [[Wiktionary:Grease pit/2019/September#Requesting bot help]])" ) secbody = new_secbody subsections = re.split("(^==+[^=\n]+==+\n)", secbody, 0, re.M) subsections_to_delete = [] for k in xrange(1, len(subsections), 2): if (subsections[k] in ["===References===\n", "====References====\n"] and not subsections[k + 1].strip()): subsections_to_delete.append(k) if subsections_to_delete: for k in reversed(subsections_to_delete): del subsections[k:k + 2] notes.append("remove empty References section") secbody = "".join(subsections) sections[j] = secbody.rstrip("\n") + secbody_finalnl + sectail return "".join(sections), notes parser = blib.create_argparser( "Remove bad Chinese references and resulting empty References section", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True)
process_arg_set(arg_set) arg_set = [] else: arg_set.append(val) for t in parsed.filter_templates(): tname = unicode(t.name) if tname == "ru-decl-noun-see": pagemsg("WARNING: Skipping ru-decl-noun-see, can't handle yet: %s" % unicode(t)) elif tname in ["ru-noun+", "ru-proper noun+"]: pagemsg("Found %s" % unicode(t)) process_new_style_headword(t) elif tname in ["ru-noun", "ru-proper noun"]: pagemsg("WARNING: Skipping ru-noun or ru-proper noun, can't handle yet: %s" % unicode(t)) parser = blib.create_argparser(u"Find red links in multiword lemmas") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) msg("Reading Russian lemmas") for i, page in blib.cat_articles("Russian lemmas", start, end): lemmas.add(unicode(page.title())) for pos in ["nouns", "proper nouns"]: tracking_page = "Template:tracking/ru-headword/space-in-headword/" + pos msg("PROCESSING REFERENCES TO: %s" % tracking_page) for index, page in blib.references(tracking_page, start, end): process_page(index, page, args.verbose) for lemma, nonexistent_msg in sorted(nonexistent_lemmas.items(), key=lambda pair:(-lemma_count[pair[0]], pair[0])): msg("* [[%s]] (%s occurrence%s): %s (refs: %s)" % (lemma, lemma_count[lemma],
pagemsg("Existing text for form %s: [[%s]]" % ( formpagename, text)) if save: formpage.delete(comment) else: pagemsg("Would delete page %s with comment=%s" % (formpagename, comment)) notes.append("fix 3olda -> %s" % direc) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes parser = blib.create_argparser("Fix up class 3a") parser.add_argument('--direcfile', help="File containing pages to fix and directives.") parser.add_argument('--delete-bad', action="store_true", help="Delete bad forms.") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) pagedirecs = [] lines = [x.strip() for x in codecs.open(args.direcfile, "r", "utf-8")] for i, line in blib.iter_items(lines, start, end): if line.startswith("#"): msg("Skipping comment: %s" % line) else: page, direc = re.split(" ", line) def do_process_page(page, index, parsed): return process_page(index, page, direc, args.delete_bad, args.verbose) blib.do_edit(pywikibot.Page(site, page), i, do_process_page, save=args.save,
# but it's the default in ru-noun-table unless the lemma is plural. # So remove n=both, generate the arguments, and see if the actual # value of args.n is b (for "both"); if not, set n=both. else: assert headword_n == "b" rmparam(see_template, "n") see_generate_template = re.sub(r"^\{\{ru-noun-table", "{{ru-generate-noun-args", unicode(see_template)) see_generate_result = expand_text(see_generate_template) if not see_generate_result: pagemsg("WARNING: Error generating ru-noun-table args") return None see_args = ru.split_generate_args(see_generate_result) if see_args["n"] != "b": see_template.add("n", "both") comment = "Replace ru-decl-noun-see with ru-noun-table, taken from headword template (%s)" % unicode(headword_template.name) if save: pagemsg("Saving with comment = %s" % comment) page.text = unicode(parsed) page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser("Convert ru-decl-noun-see into ru-noun-table decl template, taken from headword ru-(proper )noun+ template") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for index, page in blib.references("Template:ru-decl-noun-see", start, end): process_page(index, page, args.save, args.verbose)
if g not in genders: pagemsg("WARNING: Saw decl gender %s that disagrees with headword gender(s) %s: headt=%s, declt=%s" % ( g, ",".join(genders), unicode(headt), unicode(t))) continue blib.set_template_name(t, "sa-decl-noun-%s" % g) rmparam(t, "n") rmparam(t, "4") rmparam(t, "3") rmparam(t, "2") t.add("1", tr) notes.append("convert {{%s}} to {{sa-decl-noun-%s}}" % (tn, g)) else: pagemsg("WARNING: Saw unrecognized decl template: %s" % unicode(t)) if origt != unicode(t): pagemsg("Replaced %s with %s" % (origt, unicode(t))) if headt: pagemsg("WARNING: Saw {{sa-noun}} without {{sa-decl-noun-*}}: %s" % unicode(headt)) return unicode(parsed), notes parser = blib.create_argparser("Convert old {{sa-decl-noun-*}} templates to new ones", include_pagefile=True, include_stdin=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_text_on_page, edit=True, stdin=True, default_cats=["Sanskrit nouns"])
#!/usr/bin/env python # -*- coding: utf-8 -*- import pywikibot, re, sys, codecs, argparse import blib from blib import getparam, rmparam, msg, site parser = blib.create_argparser(u"Purge (null-save) pages in category or references") parser.add_argument('--cat', help="Category to purge") parser.add_argument('--ref', help="References to purge") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) pages = [] if args.cat: pages_to_list = blib.cat_articles(args.cat, start, end) else: pages_to_list = blib.references(args.ref, start, end) for i, page in pages_to_list: # msg("Page %s %s: Null-saving" % (i, unicode(page.title()))) page.save(comment="null save")
errandpagemsg("WARNING: Already found %s section" % lang) return if foundlang > lang: insert_before = j - 1 break if insert_before == 0: # Add to the end newtext = curtext.rstrip("\n") + "\n\n----\n\n" + contents return newtext, comment sections[insert_before:insert_before] = contents.rstrip( "\n") + "\n\n----\n\n" return "".join(sections), comment if __name__ == "__main__": parser = blib.create_argparser("Push new entries from generate_entries.py") parser.add_argument('--direcfile', help="File containing entries.") parser.add_argument('--comment', help="Comment to use.", required="true") parser.add_argument('--lang', help="Language of entries.", required="true") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) lines = codecs.open(args.direcfile, "r", "utf-8") index_pagename_and_text = blib.yield_text_from_find_regex( lines, args.verbose) for _, (index, pagename, text) in blib.iter_items(index_pagename_and_text, start, end, get_name=lambda x: x[1],
oldt = unicode(t) del t.params[:] t.name = "fr-conj-auto" if refl: t.add("refl", "yes") if aux: t.add("aux", aux) newt = unicode(t) pagemsg("Replacing %s with %s" % (oldt, newt)) notes.append("replaced {{%s}} with %s" % (name, newt)) newtext = unicode(parsed) if newtext != text: assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = newtext page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser("Convert old fr-conj-* to fr-conj-auto") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for cat in ["French verbs"]: msg("Processing category: %s" % cat) for i, page in blib.cat_articles(cat, start, end): process_page(i, page, args.save, args.verbose)
#!/usr/bin/env python # -*- coding: utf-8 -*- import pywikibot, re, sys, codecs, argparse import blib from blib import getparam, rmparam, msg, site parser = blib.create_argparser(u"Find verbs with impersonal conjugations") parser.add_argument('--verbfile', help="File listing verbs to check.") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for i, line in blib.iter_items(codecs.open(args.verbfile, "r", "utf-8"), start, end): page = pywikibot.Page(site, line.strip()) if "-impers|" in page.text: msg("Page %s %s: Found impersonal conjugation" % (i, unicode(page.title()))) else: msg("Page %s %s: No impersonal conjugation" % (i, unicode(page.title())))
(",".join(manual_ppps), ",".join(auto_ppps), unicode(t))) else: # no break in for loop for m in notsamemsgs: pagemsg(m) newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) new_text = unicode(parsed) if new_text != text: if verbose: pagemsg("Replacing <%s> with <%s>" % (text, new_text)) assert notes comment = "; ".join(notes) if save: pagemsg("Saving with comment = %s" % comment) page.text = new_text page.save(comment=comment) else: pagemsg("Would save with comment = %s" % comment) parser = blib.create_argparser(u"Infer the past passive participle variant from the actual PPP") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) for category in ["Russian verbs"]: for i, page in blib.cat_articles(category, start, end): process_page(i, page, args.save, args.verbose)
found_headword_template = True if getparam(t, "3") == "-": found_invariant_headword_template = True else: headword_templates.append(unicode(t)) if unicode(t.name) in ["ru-noun-table", "ru-decl-noun-see"]: found_decl_template = True if found_headword_template and not found_invariant_headword_template: if found_decl_template: pagemsg("Found old-style headword template(s) %s with decl" % ", ".join(headword_templates)) else: pagemsg("Found old-style headword template(s) %s without decl" % ", ".join(headword_templates)) parser = blib.create_argparser("Find Russian nouns without declension") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) #for pos in ["nouns", "proper nouns"]: # Do multi-word nouns # tracking_page = "Template:tracking/ru-headword/space-in-headword/" + pos # msg("Processing references to %s" % tracking_page) # for index, page in blib.references(tracking_page, start, end): # process_page(index, page) # Do all nouns with {{ru-noun}} or {{ru-proper noun}} for template in ["ru-noun", "ru-proper noun"]: for index, page in blib.references("Template:%s" % template, start, end): process_page(index, page)