subtypes = [x for x in subtypes if x != "N" and x != "I"] if "pure" in subtypes: subtypes = [x for x in subtypes if x != "pure"] else: subtypes = subtypes + ["-pure"] elif "N" in subtypes: newlemma = stem2 + "a" subtypes = [x for x in subtypes if x != "N"] else: newlemma = stem2 + u"ēs" subtypes = [x for x in subtypes if x != "-I"] newspec = ".".join([decl] + subtypes) t.add("1", "%s<%s>" % (newlemma, newspec)) pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append("convert 3rd-declension plural term to have plural lemma in {{la-ndecl}}") break if not compare_new_and_old_templates(origt, unicode(t), pagetitle, pagemsg, errandpagemsg): bad_compare = True if bad_compare: return None, None return unicode(parsed), notes parser = blib.create_argparser("Fix Latin 3rd-decl plural nouns to specify plural lemma, and check new against old {{la-ndecl}} code", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True, default_refs=["Template:la-ndecl", "Template:la-adecl"])
pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) found_audio = False for t in parsed.filter_templates(): if unicode(t.name) == "audio" and getparam(t, "lang") == "ru": found_audio = True break if found_audio: new_text = re.sub( r"\n*\[\[Category:Russian terms with audio links]]\n*", "\n\n", text) if new_text != text: return new_text, "Remove redundant [[:Category:Russian terms with audio links]]" parser = blib.create_argparser("Remove redundant audio-link categories", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True, default_cats=["Russian terms with audio links"])
sections = re.split("(^==[^=]*==\n)", text, 0, re.M) for j in xrange(2, len(sections), 2): m = re.search("^==(.*)==\n$", sections[j - 1]) assert m langname = m.group(1) if langname not in blib.languages_byCanonicalName: pagemsg("WARNING: Can't find language %s" % langname) continue langcode = blib.languages_byCanonicalName[langname]["code"] sections[j] = re.sub(r"\bLANGCODE\b", langcode, sections[j]) notes.append("replace LANGCODE with %s" % langcode) newtext = "".join(sections) return newtext, notes parser = blib.create_argparser( "Replace LANGCODE with appropriate language code", include_pagefile=True, include_stdin=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_text_on_page, edit=True, stdin=True)
frobbed_pronuns = [] if specified_pronuns: notes.append( "remove explicitly specified pronun in {{it-IPA}} because same as page title" ) blib.set_param_chain(t, frobbed_pronuns, "1", "") if t.has("voiced"): rmparam(t, "voiced") notes.append("remove voiced= in {{it-IPA}}") if origt != unicode(t): pagemsg("Replaced %s with %s" % (origt, unicode(t))) return unicode(parsed), notes parser = blib.create_argparser( "Add missing stress and z resolution to {{it-IPA}}", include_pagefile=True, include_stdin=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_text_on_page, edit=True, stdin=True, default_refs=["Template:it-IPA"])
origt = unicode(t) t.add("2", "+", before="a") notes.append("add adjectival + to %s" % name) pagemsg("Replacing %s with %s" % (origt, unicode(t))) existing_fem = getparam(t, "f") if existing_fem: if new_fem != existing_fem: pagemsg( "WARNING: New feminine %s different from existing feminine %s, not changing: %s" % (new_fem, existing_fem, unicode(t))) else: origt = unicode(t) t.add("f", new_fem) notes.append("add feminine %s to %s" % (new_fem, name)) pagemsg("Replacing %s with %s" % (origt, unicode(t))) return unicode(parsed), notes parser = blib.create_argparser("Add feminines to Russian proper names", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True, default_cats=["Russian surnames"])
if unicode(t.name) in [ "bor", "borrowing" ] and (getparam(t, "lang") == "ru" or not getparam(t, "lang") and getparam(t, "1") == "ru"): found_borrowing = True pagemsg("Already contains borrowing: %s" % m.group(0)) if not found_borrowing: pagemsg("WARNING: Can't find proper borrowing template") return text, "Use {{inh}}/{{bor}} in Russian for terms inherited or borrowed" parser = blib.create_argparser( "Use {{inh}} and {{bor}} where possible in Russian", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs( args, start, end, process_page, edit=True, default_cats=["Russian lemmas", "Russian non-lemma forms"]) msg("") msg("Processed borrowed languages:") for lang, count in sorted(borrowed_langs.items(), key=lambda x: -int(x[1])): msg("%s = %s" % (lang, count))
pagemsg("Processing") head = None for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn == "be-decl-noun": t.name = "be-decl-noun\n" for i in [2, 4, 6, 8, 10, 12]: val = getparam(t, str(i)).strip() if val: t.add(str(i), val + "\n", preserve_spacing=False) if origt != unicode(t): pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append("format {{be-decl-noun}} using newlines") return unicode(parsed), notes parser = blib.create_argparser(u"Format be-decl-noun using newlines", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, default_refs=["Template:be-decl-noun"], edit=True)
pagemsg("Processing") text = unicode(page.text) parsed = blib.parse(page) notes = [] for t in parsed.filter_templates(): if unicode(t.name) == "ru-ux": origt = unicode(t) if t.has("adj"): pagemsg("Removing adj=") notes.append("remove adj= from ru-ux") rmparam(t, "adj") if t.has("shto"): pagemsg("Removing shto=") notes.append("remove shto= from ru-ux") rmparam(t, "shto") newt = unicode(t) if origt != newt: pagemsg("Replaced %s with %s" % (origt, newt)) return unicode(parsed), notes parser = blib.create_argparser("Remove adj= and shto= from ru-ux", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True, default_refs=["Template:ru-ux"])
if "==Alternative forms==" in secbody: pagemsg("WARNING: Skipping page with 'Alternative forms' section") return parsed = blib.parse_text(secbody) for t in parsed.filter_templates(): origt = unicode(t) tn = tname(t) if tn in ["compound", "affix", "af"] and getparam( t, "1") == "hu" and not getparam(t, "pos"): t.add("pos", "noun") if origt != unicode(t): pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append("add pos=noun to {{%s|hu}}" % tn) sections[j] = unicode(parsed) + sectail text = "".join(sections) return text, notes parser = blib.create_argparser(u"Add pos=noun to Hungarian compound words", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, default_cats=["Hungarian compound words"], edit=True)
if tn == "bg-IPA": if not getparam(t, "old"): continue pron = getparam(t, "1") if pron: pron = decompose_bulgarian(pron) pron = pron.replace(AC, SUB) pron = pron.replace(GR, AC) pron = pron.replace(SUB, GR) t.add("1", pron) rmparam(t, "old") notes.append( "convert {{bg-IPA}} pronunciation to new style (flip acute and grave) and remove old=1" ) if unicode(t) != origt: pagemsg("Replaced %s with %s" % (origt, unicode(t))) return parsed, notes parser = blib.create_argparser("Fix {{bg-IPA}} to new format", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, default_refs=["Template:bg-IPA"], edit=1)
if getparam(t, "3") == "superlative of": base_lemma = getparam(t, "4") rmparam(t, "head") rmparam(t, "4") rmparam(t, "3") t.add("1", lemma) t.add("2", base_lemma) blib.set_template_name(t, "la-adj-sup") pagemsg("Replaced %s with %s" % (origt, unicode(t))) notes.append("Use {{la-adj-sup}} instead of {{head|la|...}}") else: pagemsg( "WARNING: Head template doesn't include base form: %s" % unicode(t)) return unicode(parsed), notes parser = blib.create_argparser( "Fix Latin superlatives formatted using {{head|la|...}}", include_pagefile=True) args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page, default_cats=["Latin adjective superlative forms"], edit=True)
def process_page(page, index): global args pagetitle = unicode(page.title()) for i in xrange(1, args.max_prefix_length + 1): if len(pagetitle) >= i: prefix = pagetitle[0:i] prefixes_by_length[i][prefix].append(pagetitle) parser = blib.create_argparser("Snarf Italian pronunciations for fixing", include_pagefile=True) parser.add_argument("--max-prefix-length", type=int, default=10, help="Maximum length of prefixes to check for") args = parser.parse_args() start, end = blib.parse_start_end(args.start, args.end) blib.do_pagefile_cats_refs(args, start, end, process_page) for i in xrange(1, args.max_prefix_length + 1): max_prefixes = sorted(list(prefixes_by_length[i].iteritems()), key=lambda x: -len(x[1])) msg("Prefix length = %s" % i) msg("------------------- begin -----------------------") for prefix, titles in max_prefixes: msg(("%%5d %%%ds %%s" % i) % (len(titles), prefix, ",".join(titles))) msg("------------------- end -----------------------")