def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")
    notes = []

    for t in parsed.filter_templates():
        origt = unicode(t)
        tn = tname(t)
        if tn in ["diminutive of", "dim of"]:
            if t.has("pos"):
                pos = re.sub("s$", "", getparam(t, "pos"))
                t.add("POS", pos, before="pos")
                rmparam(t, "pos")
                notes.append("Convert plural pos= to singular POS= in {{%s}}" %
                             tn)

        if unicode(t) != origt:
            pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t)))

    return unicode(parsed), notes


parser = blib.create_argparser(
    "Convert plural pos= to singular POS= in {{diminutive of}}",
    include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True)
Example #2
0
        if val:
          seenval = True
        if seenval:
          t.add(str(i + 1), val)
      t.add("1", conjtype)
      blib.sort_params(t)
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Convert ru-conj-* to ru-conj and move variant")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.cat_articles("Russian verbs", start, end):
  process_page(i, page, args.save, args.verbose)
Example #3
0
      if origt != newt:
        pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(blib.group_notes(notes))
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Convert Japanese headwords from old-style to new-style")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

romaji_to_keep = set()
for i, page in blib.cat_articles("Japanese terms with romaji needing attention"):
  pagetitle = unicode(page.title())
  romaji_to_keep.add(pagetitle)

for ref in ["ja-noun", "ja-adj", "ja-verb", "ja-pos"]:
  msg("Processing references to Template:%s" % ref)
  for i, page in blib.references("Template:%s" % ref, start, end):
    process_page(i, page, args.save, args.verbose, romaji_to_keep)
Example #4
0
#    find_rfdef.py is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Find pages that need definitions among a set list (e.g. most frequent words).

import pywikibot, re, sys, codecs, argparse

import blib
from blib import getparam, rmparam, msg, site

parser = blib.create_argparser(u"Find pages that need definitions")
parser.add_argument("--pagefile", help="File containing pages to check")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

lines = set([x.strip() for x in codecs.open(args.pagefile, "r", "utf-8")])
for i, page in blib.cat_articles("Russian entries needing definition", start, end):
    pagetitle = page.title()
    if pagetitle in lines:
        msg("* Page %s [[%s]]" % (i, pagetitle))
          "impf", "impf-intr", "impf-refl",
          "impf-impers", "impf-intr-impers", "impf-refl-impers"]:
        conjtype = getparam(t, "1")
        t.add("2", conjtype)
        t.add("1", verbtype)
        notes.append("move verb type from arg 2 to arg 1")
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Move verb type from arg 2 to arg 1")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.cat_articles("Russian verbs", start, end):
  process_page(i, page, args.save, args.verbose)
  text = unicode(page.text)

  foundrussian = False
  sections = re.split("(^==[^=]*==\n)", text, 0, re.M)

  for j in xrange(2, len(sections), 2):
    if sections[j-1] == "==Russian==\n":
      if foundrussian:
        pagemsg("WARNING: Found multiple Russian sections, skipping page")
        return
      foundrussian = True

      found_headword_template = False
      parsed = blib.parse_text(sections[j])
      for t in parsed.filter_templates():
        tname = unicode(t.name)
        if tname == "ru-adj" or (tname == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "adjective form"):
          found_headword_template = True
      if not found_headword_template and "===Adjective===" in sections[j]:
        pagemsg("WARNING: Missing adj headword template")

parser = blib.create_argparser("Find missing adjective headwords")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for cat in ["Russian adjectives", "Russian adjective forms", "Russian lemmas", "Russian non-lemma forms"]:
  msg("Processing category %s" % cat)
  for index, page in blib.cat_articles(cat, start, end):
    process_page(index, page)
Example #7
0
      if val != newval:
        pagemsg("Removing accents from 1= in {{wikipedia|...}}")
        notes.append("remove accents from 1= in {{wikipedia|...}}")
        t.add("1", newval)
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Remove accents from 1= in {{wikipedia|...}}")
parser.add_argument('--pagefile', help="File containing pages to fix.")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

lines = [x.strip() for x in codecs.open(args.pagefile, "r", "utf-8")]
for i, page in blib.iter_items(lines, start, end):
  process_page(i, pywikibot.Page(site, page), args.save, args.verbose)
Example #8
0
          pagemsg("Found additional named param %s" % unicode(param))
      t.add("3", presstem)
      if direc:
        t.add("4", "")
        t.add("5", direc)
      blib.sort_params(t)
      #blib.set_param_chain(t, ppps, "past_pasv_part", "past_pasv_part")
      notes.append("set class-7b verb to directive %s%s" %
          (direc, npp and u" (no ё in present stem)" or ""))
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  return unicode(parsed), notes

parser = blib.create_argparser(u"Fix up class-7b arguments")
parser.add_argument('--direcfile', help="File containing pages to fix and directives.")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

pagedirecs = []
lines = [x.strip() for x in codecs.open(args.direcfile, "r", "utf-8")]
for i, line in blib.iter_items(lines, start, end):
  if line.startswith("#"):
    msg("Skipping comment: %s" % line)
  elif " " not in line:
    msg("Skipping because no space: %s" % line)
  elif "7b" not in line:
    msg("Skipping because 7b not in line: %s" % line)
  else:
    page, direc = re.split(" ", line)
                continue
            if not g:
                pagemsg("WARNING: Didn't see gender: %s" % unicode(t))
                continue
            origt = unicode(t)
            del t.params[:]
            blib.set_template_name(t, "it-noun")
            if head:
                t.add("head", head)
            t.add("1", g)
            if g2:
                t.add("g2", g2)
            t.add("2", "-")
            notes.append(
                "replace {{head|it|noun|...|invariable}} with {{it-noun|...|-}}"
            )
        newt = unicode(t)
        if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))

    return unicode(parsed), notes


parser = blib.create_argparser(
    "Replace {{head|it|noun|...|invariable}} with {{it-noun|...|-}}",
    include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True)
      else:
        pagemsg("WARNING: Unrecognized param 2: %s" % origt)
        continue
      allow_2 = True
    if lemma:
      bad_param = False
      for param in t.params:
        pname = unicode(param.name)
        if pname.strip() == "1" or allow_2 and pname.strip() == "2":
          continue
        pagemsg("WARNING: Unrecognized param %s=%s: %s" % (
          pname, param.value, origt))
        bad_param = True
      if bad_param:
        continue
      rmparam(t, "2")
      t.add("1", lemma)
      blib.set_template_name(t, "la-part")
      pagemsg("Replaced %s with %s" % (origt, unicode(t)))
      notes.append(u"convert {{%s}} to {{la-part}}" % tn)

  return unicode(parsed), notes

parser = blib.create_argparser(u"Convert Latin participle headwords to use {{la-part}}",
    include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args, start, end, process_page,
  default_cats=["Latin participles"], edit=True)
Example #11
0
                        "WARNING: Can't handle %s=%s in %s: <from> %s <to> %s <end>"
                        % (pn, unicode(param.value), origline, origline))
                    return origline
            return "{{name translit|%s|%s|%s|type=%s%s}}%s" % (
                thislangcode, source_lang_code, name, name_type,
                "|eq=%s" % eq if eq else "", period)

        newsec = re.sub(
            r"'*(?:\{\{(?:non-gloss definition|non-gloss|ngd|n-g)\|)*A \[*transliteration\]* of the ([A-Z][a-z]*) (male given name|female given name|surname|patronymic) (\{\{[lm]\|[a-zA-Z-]*\|[^{}]*?\}\})\}*'*(\.?)'*}*",
            replace_name_translit, sections[j])
        if newsec != sections[j]:
            notes.append("templatize {{name translit}} usage for lang '%s'" %
                         thislangname)
            sections[j] = newsec
    return "".join(sections), notes


parser = blib.create_argparser(
    "Templatize 'A transliteration of LANG name NAME' into {{name translit}}",
    include_pagefile=True,
    include_stdin=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_text_on_page,
                           edit=True,
                           stdin=True)
Example #12
0
  text = unicode(page.text)
  parsed = blib.parse_text(text)
  for t in parsed.filter_templates():
    tn = tname(t)
    origt = unicode(t)
    param = None
    if tn in ["bg-noun", "bg-proper noun", "bg-verb", "bg-adj", "bg-adv",
        "bg-part", "bg-part form", "bg-verbal noun", "bg-verbal noun form",
        "bg-phrase"]:
      param = "1"
    elif tn == "head" and getparam(t, "1") == "bg":
      param = "head"
    if param:
      val = getparam(t, param)
      val = bglib.decompose(val)
      if GR in val:
        val = val.replace(GR, AC)
        t.add(param, val)
        notes.append("convert grave to acute in {{%s}}" % tn)
    if unicode(t) != origt:
      pagemsg("Replaced %s with %s" % (origt, unicode(t)))
  return unicode(parsed), notes

parser = blib.create_argparser("Change grave to acute in Bulgarian headwords",
    include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args, start, end, process_page,
    default_cats=["Bulgarian lemmas", "Bulgarian non-lemma forms"], edit=1)
Example #13
0
            if not mm:
                pagemsg("WARNING: Saw unparsable part %s, not changing: %s" %
                        (parts[i], m.group(0)))
                return m.group(0)
            if TEMPSEP in parts[i]:
                pagemsg(
                    "WARNING: Internal error: Saw Unicode FFF0 in part %s, not changing: %s"
                    % parts[i], m.group(0))
                return m.group(0)
            parts[i] = "{{l|pl|%s}}" % mm.group(1)
        notes.append("replace multipart {{l|pl|...}} with separate links")
        return ", ".join(parts)

    text = re.sub(r"\{\{l\|pl\|([^{}]*[\[\]][^{}]*)\}\}", split_links, text)
    return text, notes


parser = blib.create_argparser(
    "Split {{l|pl|...}} links containing multiple entries",
    include_pagefile=True,
    include_stdin=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_text_on_page,
                           edit=True,
                           stdin=True)
        headt = None
      headt = t
    elif tn in ["be-decl-noun", "be-decl-noun-unc", "be-decl-noun-pl"]:
      if not headt:
        pagemsg("WARNING: Encountered declension template without headword: %s" % unicode(t))
      else:
        process_noun_headt(headt, t)
        headt = None
    elif tn == "rfinfl" and getparam(t, "1") == "be":
      if headt:
        process_noun_headt(headt)
        headt = None
    elif tn == "be-verb":
      process_verb_headt(t)
    elif tn == "be-adj":
      process_adj_headt(t)
  if headt:
    pagemsg("WARNING: Encountered headword template without declension: %s" % unicode(headt))
    process_noun_headt(headt)

  return unicode(parsed), notes

parser = blib.create_argparser(u"Clean up be-noun params",
    include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args, start, end, process_page,
    #default_refs=["Template:be-adj", "Template:be-verb", "Template:be-noun"], edit=True)
    default_cats=["Belarusian proper nouns", "Belarusian nouns"], edit=True)
Example #15
0
      elif getparam(t, "p"):
        pagemsg("WARNING: Found unexpected p=%s: %s" % (getparam(t, "p"),
          unicode(t)))
      if not re.search("[ -]", pagetitle) and (getparam(t, "f") or
          getparam(t, "mp") or getparam(t, "fp") or getparam(t, "p")):
        pagemsg("Found remaining explicit feminine or plural in single-word base form: %s"
            % unicode(t))
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replacing %s with %s" % (origt, newt))

  newtext = unicode(parsed)
  if newtext != text:
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = newtext
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser("Remove extraneous params from {{fr-adj}}")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for cat in ["French adjectives"]:
  msg("Processing category: %s" % cat)
  for i, page in blib.cat_articles(cat, start, end):
    process_page(i, page, args.save, args.verbose)
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    text = unicode(page.text)
    parsed = blib.parse(page)

    cat = do_noun and "nouns" or "proper nouns"
    new_text = re.sub(r"\n\n\n*\[\[Category:Russian %s]]\n\n\n*" % cat, "\n\n",
                      text)
    new_text = re.sub(r"\[\[Category:Russian %s]]\n" % cat, "", new_text)
    return new_text, "Remove redundant [[:Category:Russian %s]]"


parser = blib.create_argparser("Remove redundant 'Russian nouns' category",
                               include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)


def do_process_page_do_noun_true(page, index, parsed):
    return process_page(page, index, do_noun=True)


def do_process_page_do_noun_false(page, index, parsed):
    return process_page(page, index, do_noun=False)


# FIXME! Won't work properly with --pagefile.
blib.do_pagefile_cats_refs(
    args,
  if new_noun_table_template != orig_noun_table_template:
    pagemsg("Replacing noun table %s with %s" % (orig_noun_table_template,
      new_noun_table_template))

  new_headword_template = unicode(headword_template)
  if new_headword_template != orig_headword_template:
    pagemsg("Replacing headword %s with %s" % (orig_headword_template,
      new_headword_template))
    if unicode(headword_template.name) == "ru-noun+":
      ru_noun_changed = 1
    else:
      ru_proper_noun_changed = 1

  return unicode(parsed), ru_noun_table_cleaned, ru_noun_table_link_copied, ru_noun_changed, ru_proper_noun_changed

parser = blib.create_argparser("Copy the declension in ru-noun-table to ru-noun+, preserving any m=, f=, g=, etc. in the latter.")
parser.add_argument('--cats', default="nouns,proper nouns", help="Categories to do ('nouns', 'proper nouns' or 'nouns,proper nouns')")
parser.add_argument('--lemma-file', help="File containing lemmas to copy declension of. Will remove extraneous params from ru-noun-table and copy links to ru-noun-table regardless of this.")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

if args.lemma_file:
  lemmas = set([x.strip() for x in codecs.open(args.lemma_file, "r", "utf-8")])
else:
  lemmas = None

for cat in re.split(",", args.cats):
  if cat == "nouns":
    template = "Template:ru-noun+"
  elif cat == "proper nouns":
    template = "Template:ru-proper noun+"
Example #18
0
                def add_links(m):
                    prefix = m.group(1)
                    if re.search(u"[гкх]о$", prefix):
                        first = prefix[:-1] + u"ий"
                    else:
                        first = prefix[:-1] + u"ый"
                    return u"[[%s|%s]]-[[%s]]" % (rulib.remove_accents(first),
                                                  prefix, m.group(2))

                t.add("1", re.sub(u"^(.*?о)-([^-]*)$", add_links, head))
            notes.append("add links to two-part adjective")
        newt = unicode(t)
        if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))

    return unicode(parsed), notes


parser = blib.create_argparser("Add links to two-part adjectives",
                               include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_page,
                           edit=True,
                           default_cats=["Russian adjectives"])
          rmparam(t, "1")
          notes.append("remove redundant 1= from {{%s}}" % name)
        else:
          pagemsg("Not removing non-redundant 1=%s" % head)
          check_bad_head(head, "1")

    newt = unicode(t)
    if origt != newt:
      pagemsg("Replacing %s with %s" % (origt, newt))

  newtext = unicode(parsed)
  if newtext != text:
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = newtext
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser("Remove redundant head= from French terms")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

#for cat in ["French lemmas", "French non-lemma forms"]:
for cat in ["French lemmas"]:
  msg("Processing category: %s" % cat)
  for i, page in blib.cat_articles(cat, start, end):
    process_page(i, page, args.save, args.verbose)
Example #20
0
        if tname(t) in [
                "ru-conj", "ru-conj-old", "User:Benwing2/ru-conj",
                "User:Benwing2/ru-conj-old"
        ]:
            t.add("1", getparam(t, "1").replace("-refl", ""))
        elif tname(t) == "temp" and getparam(t, "1") == "ru-conj":
            t.add("2", getparam(t, "2").replace("-refl", ""))
        newt = unicode(t)
        if origt != newt:
            notes.append("remove -refl from verb type")
            pagemsg("Replaced %s with %s" % (origt, newt))

    return parsed, notes


parser = blib.create_argparser(
    u"Fix up verb conjugations to not specify -refl")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_edit(pywikibot.Page(site, "User:Benwing2/test-ru-verb"),
             1,
             process_page,
             save=args.save,
             verbose=args.verbose)
blib.do_edit(pywikibot.Page(site, "User:Benwing2/test-ru-verb-2"),
             2,
             process_page,
             save=args.save,
             verbose=args.verbose)
for ref in ["Template:ru-conj-old"]:
    msg("Processing references to: %s" % ref)
Example #21
0
    tname = unicode(t.name)
    if tname in ru_head_templates:
      headname = tname
      found_this_head = True
    elif tname == "head" and getparam(t, "1") == "ru":
      headtype = getparam(t, "2")
      headname = "head|ru|%s" % headtype
      if headtype in ru_heads_to_warn_about:
        pagemsg("WARNING: Found %s" % headname)
      found_this_head = True
    if found_this_head:
      cat_head_count[headname] = cat_head_count.get(headname, 0) + 1
      overall_head_count[headname] = overall_head_count.get(headname, 0) + 1
      found_page_head = True
  if not found_page_head:
    pagemsg("WARNING: No head")
  if index % 100 == 0:
    output_heads_seen()

parser = blib.create_argparser(u"Find Russian terms without a proper headword line")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for category in ["Russian nouns", "Russian proper nouns", "Russian pronouns", "Russian determiners", "Russian adjectives", "Russian verbs", "Russian participles", "Russian adverbs", "Russian prepositions", "Russian conjunctions", "Russian interjections", "Russian idioms", "Russian phrases", "Russian abbreviations", "Russian acronyms", "Russian initialisms", "Russian noun forms", "Russian proper noun forms", "Russian pronoun forms", "Russian determiner forms", "Russian verb forms", "Russian adjective forms", "Russian participle forms"]:
  cat_head_count = {}
  msg("Processing category: %s" % category)
  for i, page in blib.cat_articles(category, start, end):
    process_page(i, page, args.save, args.verbose)
  output_heads_seen()
output_heads_seen(overall=True)
Example #22
0
                lang = getparam(t, "1")
                termparam = 2
            if lang != "la":
                #pagemsg("WARNING: Wrong language in template: %s" % unicode(t))
                continue
            term = getparam(t, str(termparam))
            alt = getparam(t, str(termparam + 1))
            gloss = getparam(t, str(termparam + 2))
            if alt and lalib.remove_macrons(alt) == term:
                origt = unicode(t)
                t.add(str(termparam), alt)
                if gloss:
                    t.add(str(termparam + 1), "")
                else:
                    rmparam(t, str(termparam + 1))
                pagemsg("Replaced %s with %s" % (origt, unicode(t)))
                notes.append("move alt param to link param in %s" % tn)

    secbody = unicode(parsed)
    sections[j] = secbody + sectail
    return "".join(sections), notes


parser = blib.create_argparser(
    "Move alt param to term param in {{l}}, {{m}}, {{alternative form of}}, {{alt form}}",
    include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True)
Example #23
0
    pagemsg("WARNING: Script no longer applies and would need fixing up")
    return

    pagemsg("Processing")
    new_text = "#REDIRECT [[Module:ru-verb/documentation]]"
    comment = "redirect to [[Module:ru-verb/documentation]]"
    if save:
        pagemsg("Saving with comment = %s" % comment)
        page.text = new_text
        page.save(comment=comment)
    else:
        pagemsg("Would save with comment = %s" % comment)


parser = blib.create_argparser(u"Redirect ru-conj-* documentation pages")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

types = [
    "7a",
    "7b",
    "8a",
    "8b",
    "9a",
    "9b",
    "10a",
    "10c",
    "11a",
    "11b",
    "12a",
                        "Phonetic respelling %s (translit %s) in %s agrees with head translit %s, auto translit %s"
                        % (",".join(phon_respellings), ",".join(respelling_tr),
                           unicode(t), ",".join(head_template_tr),
                           ",".join(head_auto_tr)))

    if noun_head_template and head_template_tr and not saw_ndecl:
        pagemsg(
            "WARNING: Missing declension for noun needing phonetic respelling, headtr=%s, autotr=%s: %s"
            % (",".join(head_template_tr), ",".join(head_auto_tr),
               unicode(noun_head_template)))

    return unicode(parsed), notes


parser = blib.create_argparser(
    "Remove redundant translit from Hindi headwords and check translit against phonetic respelling",
    include_pagefile=True,
    include_stdin=True)
parser.add_argument('--direcfile',
                    help="File containing output from find_regex.py.")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_text_on_page,
                           default_cats=["Hindi lemmas"],
                           edit=True,
                           stdin=True)
    if unicode(t.name) == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "verb form":
      found_head_verb_form = True

  if not found_head_verb_form or not found_inflection_of:
    # Find definition line
    foundrussian = False
    sections = re.split("(^==[^=]*==\n)", unicode(page.text), 0, re.M)

    for j in xrange(2, len(sections), 2):
      if sections[j-1] == "==Russian==\n":
        if foundrussian:
          pagemsg("WARNING: Found multiple Russian sections, skipping page")
          return
        foundrussian = True

        deflines = r"\n".join(re.findall(r"^(# .*)$", sections[j], re.M))

  if not found_head_verb_form:
    pagemsg("WARNING: No {{head|ru|verb form}}: %s" % deflines)
  if not found_inflection_of:
    pagemsg("WARNING: No 'inflection of': %s" % deflines)

parser = blib.create_argparser(u"Find badly formatted Russian verb forms")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for category in ["Russian verb forms"]:
  msg("Processing category: %s" % category)
  for i, page in blib.cat_articles(category, start, end):
    process_page(i, page, args.save, args.verbose)
Example #26
0
                if not re.search(r"\.\s*$", notesval):
                    notesval = re.sub(r"(\s*)$", r".\1", notesval)
                t.add("footnote",
                      notesval,
                      before="notes",
                      preserve_spacing=False)
                rmparam(t, "notes")
            blib.set_template_name(t, "be-adecl-manual")
            notes.append("convert {{be-adj-table}} to {{be-adecl-manual}}")
        if origt != unicode(t):
            pagemsg("Replaced %s with %s" % (origt, unicode(t)))

    return unicode(parsed), notes


parser = blib.create_argparser(
    u"Convert old Belarusian adjective declension templates to new ones",
    include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_page,
                           default_cats=[
                               "Belarusian adjectives", "Belarusian pronouns",
                               "Belarusian determiners"
                           ],
                           edit=True)
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)

  found_audio = False
  for t in parsed.filter_templates():
    if unicode(t.name) == "audio" and getparam(t, "lang") == "ru":
      found_audio = True
      break
  if found_audio:
    new_text = re.sub(r"\n*\[\[Category:Russian terms with audio links]]\n*", "\n\n", text)
    if new_text != text:
      comment = "Remove redundant [[:Category:Russian terms with audio links]]"
      if save:
        pagemsg("Saving with comment = %s" % comment)
        page.text = new_text
        page.save(comment=comment)
      else:
        pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser("Remove redundant audio-link categories")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.cat_articles("Russian terms with audio links", start, end):
  process_page(i, page, args.save, args.verbose)
        # Find definition line
        foundrussian = False
        sections = re.split("(^==[^=]*==\n)", unicode(page.text), 0, re.M)

        for j in xrange(2, len(sections), 2):
            if sections[j - 1] == "==Russian==\n":
                if foundrussian:
                    pagemsg(
                        "WARNING: Found multiple Russian sections, skipping page"
                    )
                    return
                foundrussian = True

                deflines = r"\n".join(
                    re.findall(r"^(# .*)$", sections[j], re.M))

    if not found_head_verb_form:
        pagemsg("WARNING: No {{head|ru|verb form}}: %s" % deflines)
    if not found_inflection_of:
        pagemsg("WARNING: No 'inflection of': %s" % deflines)


parser = blib.create_argparser(u"Find badly formatted Russian verb forms")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for category in ["Russian verb forms"]:
    msg("Processing category: %s" % category)
    for i, page in blib.cat_articles(category, start, end):
        process_page(i, page, args.save, args.verbose)
Example #29
0
        notes.append("replaced {{head|fr|%s}} with {{%s}}%s" % (headtype,
          unicode(t.name), " (NEEDS REVIEW)" if fixed_plural_warning else ""))

  newtext = unicode(parsed)
  if newtext != text:
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = newtext
      blib.try_repeatedly(lambda: page.save(comment=comment), pagemsg,
                    "save page")
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser("Convert head|fr|* to fr-*")
parser.add_argument("--fix-missing-plurals", action="store_true", help="Fix cases with missing plurals by just assuming the default plural.")
parser.add_argument("--lemma-file",help="File containing lemmas to do.")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

if args.lemma_file:
  lines = [x.strip() for x in codecs.open(args.lemma_file, "r", "utf-8")]
  for i, pagename in blib.iter_items(lines, start, end):
    process_page(i, pywikibot.Page(site, pagename), args.save, args.verbose, args.fix_missing_plurals)
else:
  for cat in ["French nouns", "French proper nouns", "French pronouns", "French determiners", "French adjectives", "French verbs", "French participles", "French adverbs", "French prepositions", "French conjunctions", "French interjections", "French idioms", "French phrases", "French abbreviations", "French acronyms", "French initialisms", "French noun forms", "French proper noun forms", "French pronoun forms", "French determiner forms", "French verb forms", "French adjective forms", "French participle forms", "French proverbs", "French prefixes", "French suffixes", "French diacritical marks", "French punctuation marks"]:
  #for cat in ["French adjective forms", "French participle forms", "French proverbs", "French prefixes", "French suffixes", "French diacritical marks", "French punctuation marks"]:
    msg("Processing category: %s" % cat)
    for i, page in blib.cat_articles(cat, start, end):
      process_page(i, page, args.save, args.verbose, args.fix_missing_plurals)
    rmparam(headword_template, "g2")
    rmparam(headword_template, "g3")
    rmparam(headword_template, "g4")
    rmparam(headword_template, "g5")
    for gnum, g in enumerate(genders):
        param = "g" if gnum == 0 else "g" + str(gnum + 1)
        headword_template.add(param, g)
    pagemsg("Replacing %s with %s" %
            (orig_template, unicode(headword_template)))

    return unicode(
        parsed
    ), "Fix headword gender, substituting new value %s" % ",".join(genders)


parser = blib.create_argparser(
    "Fix gender errors introduced by fix_ru_noun.py")
parser.add_argument('--direcfile',
                    help="File containing pages and warnings to process",
                    required=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

# * Page 3574 [[коала]]: WARNING: Gender mismatch, existing=m-an,f-an, new=f-an
lines = [x.strip() for x in codecs.open(args.pagefile, "r", "utf-8")]
for i, line in blib.iter_items(lines, start, end):
    m = re.search(
        "^\* Page [0-9]+ \[\[(.*?)\]\]: WARNING: Gender mismatch, existing=(.*?), new=.*?$",
        line)
    if not m:
        msg("WARNING: Can't process line: %s" % line)
    else:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import pywikibot, re, sys, codecs, argparse

import blib
from blib import getparam, rmparam, msg, site

import rulib as ru

parser = blib.create_argparser(u"Delete ru-conj-* templates and documentation pages")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

msg("WARNING: Script no longer applies and would need fixing up")

types = ["1a", "2a", "2b", "3oa", "3a", "3b", "3c", "4a", "4b", "4c", "5a",
    "5b", "5c", "6a", "6b", "6c",
    "7a", "7b", "8a", "8b", "9a", "9b", "10a", "10c", "11a", "11b",
    "12a", "12b", "13b", "14a", "14b", "14c", "15a", "16a", "16b",
    u"irreg-бежать", u"irreg-спать", u"irreg-хотеть", u"irreg-дать",
    u"irreg-есть", u"irreg-сыпать", u"irreg-лгать", u"irreg-мочь",
    u"irreg-слать", u"irreg-идти", u"irreg-ехать", u"irreg-минуть",
    u"irreg-живописать-миновать", u"irreg-лечь", u"irreg-зиждиться",
    u"irreg-клясть", u"irreg-слыхать-видать", u"irreg-стелить-стлать",
    u"irreg-быть", u"irreg-ссать-сцать", u"irreg-чтить", u"irreg-шибить",
    u"irreg-плескать", u"irreg-реветь", u"irreg-внимать", u"irreg-внять",
    u"irreg-обязывать"]
for i, ty in blib.iter_items(types, start, end):
  template_page = pywikibot.Page(site, "Template:ru-conj-%s" % ty)
  if template_page.exists():
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, lemma, txt))

    def errandpagemsg(txt):
        errandmsg("Page %s %s: %s" % (index, lemma, txt))

    def expand_text(tempcall):
        return blib.expand_text(tempcall, lemma, pagemsg, verbose)

    pagemsg("Processing")

    for formind, form in blib.iter_items(forms):
        delete_form(index, lemma, formind, form, lang, save, verbose, diff)


parser = blib.create_argparser(u"Delete bad forms for inflected languages")
parser.add_argument('--formfile',
                    help="File containing lemmas and forms to delete.",
                    required=True)
parser.add_argument('--lang',
                    help="Language ('es' or 'it').",
                    choices=["es", "it"],
                    required=True)
parser.add_argument('--output-pages-to-delete',
                    help="File to write pages to delete.")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

pages_to_delete = []
lines = [x.strip() for x in codecs.open(args.formfile, "r", "utf-8")]
for index, line in blib.iter_items(lines, start, end):
            t.add("4", number)
            newt = unicode(t)
            if origt != newt:
              pagemsg("Replaced %s with %s" % (origt, newt))
              notes.append("converted '%s|%s' to '%s|%s'" %
                  (number, case, case, number))
      sections[j] = unicode(parsed)

  new_text = "".join(sections)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(blib.group_notes(notes))
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Canonicalize 'inflection of' for noun forms")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for category in ["Russian noun forms"]:
  msg("Processing category: %s" % category)
  for i, page in blib.cat_articles(category, start, end):
    process_page(i, page, args.save, args.verbose)
Example #34
0
def process_page(index, page, save, verbose):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    if not re.search(ur"(ник|ок)([ -]|$)", pagetitle):
        return

    parsed = blib.parse(page)
    for t in parsed.filter_templates():
        tname = unicode(t.name)
        if tname == "ru-noun-table":
            ut = unicode(t)
            if re.search(ur"ни́к(\||$)", ut) and "|b" not in ut:
                pagemsg("WARNING: Likely missing accent b: %s" % ut)
            if re.search(ur"о́к(\||$)", ut) and "*" in ut and "|b" not in ut:
                pagemsg("WARNING: Likely missing accent b: %s" % ut)


parser = blib.create_argparser(u"Find likely missing accent b")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for category in ["Russian nouns"]:
    msg("Processing category: %s" % category)
    for i, page in blib.cat_articles(category, start, end):
        process_page(i, page, args.save, args.verbose)
Example #35
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import pywikibot, re, sys, codecs, argparse

import blib
from blib import getparam, rmparam, msg, site

parser = blib.create_argparser(u"List pages, lemmas and/or non-lemmas")
parser.add_argument('--cats', default="Russian lemmas", help="Categories to do (can be comma-separated list)")
parser.add_argument('--refs', help="References to do (can be comma-separated list)")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

if args.refs:
  for ref in re.split(",", args.refs):
    msg("Processing references to: %s" % ref)
    for i, page in blib.references(ref, start, end):
      msg("Page %s %s: Processing" % (i, unicode(page.title())))
else:
  for cat in re.split(",", args.cats):
    msg("Processing category: %s" % cat)
    for i, page in blib.cat_articles(cat, start, end):
      msg("Page %s %s: Processing" % (i, unicode(page.title())))
Example #36
0
                        t.add("2", "irreg/c'")
                        notes.append(
                            "make past stress /c' explicit in irreg verb")
                    else:
                        t.add("2", "irreg/c")
                        notes.append(
                            "make past stress /c explicit in irreg verb")
                elif param2 == "irreg/a":
                    t.add("2", "irreg")
                    notes.append("make past stress /a default in irreg verb")
                elif not param2.startswith("irreg/"):
                    errpagemsg("WARNING: Unable to parse param2 %s" % param2)

        newt = unicode(t)
        if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))

    return parsed, notes


parser = blib.create_argparser(
    u"Fix up class-8 and irregular arguments to have class a as default past stress"
)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.cat_articles("Russian class 8b verbs", start, end):
    blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
for i, page in blib.cat_articles("Russian irregular verbs", start, end):
    blib.do_edit(page, i, process_page, save=args.save, verbose=args.verbose)
Example #37
0
    return blib.expand_text(tempcall, pagetitle, pagemsg, verbose)

  pagemsg("Processing")

  parsed = blib.parse(page)
  for t in parsed.filter_templates():
    if unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam(t, "1").startswith("pf"):
      if tname == "ru-conj":
        tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t))
      else:
        tempcall = re.sub(r"\{\{ru-conj-old", "{{ru-generate-verb-forms|old=y", unicode(t))
      result = expand_text(tempcall)
      if not result:
        pagemsg("WARNING: Error generating forms, skipping")
        continue
      args = rulib.split_generate_args(result)
      for base in ["past_pasv_part", "ppp"]:
        for i in ["", "2", "3", "4", "5", "6", "7", "8", "9"]:
          val = getparam(t, base + i)
          if val and val != "-":
            val = re.sub("//.*", "", val)
            pagemsg("Found perfective past passive participle: %s" % val)

parser = blib.create_argparser(u"Find Russian perfective verbs with explicit past passive participles")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for category in ["Russian verbs"]:
  for i, page in blib.cat_articles(category, start, end):
    process_page(i, page, args.save, args.verbose)
Example #38
0
                    forms_seen.add(form_no_macrons)
                    slots_and_forms_to_process.append((slot, form))
            for formindex, (slot, form) in blib.iter_items(
                    sorted(slots_and_forms_to_process,
                           key=lambda x: lalib.remove_macrons(x[1]))):

                def handler(page, formindex, parsed):
                    return process_form(page, formindex, slot, form, pos,
                                        pagemsg)

                blib.do_edit(pywikibot.Page(site, lalib.remove_macrons(form)),
                             "%s.%s" % (index, formindex),
                             handler,
                             save=args.save,
                             verbose=args.verbose,
                             diff=args.diff)


parser = blib.create_argparser(
    u"Correct headers/headwords of non-lemma forms with the wrong part of speech",
    include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(
    args,
    start,
    end,
    process_page,
    default_cats=["Latin participles", "Latin proper nouns"])
  rmparam(headword_template, "g4")
  rmparam(headword_template, "g5")
  for gnum, g in enumerate(genders):
    param = "g" if gnum == 0 else "g" + str(gnum+1)
    headword_template.add(param, g)
  pagemsg("Replacing %s with %s" % (orig_template, unicode(headword_template)))

  comment = "Fix headword gender, substituting new value %s" % ",".join(genders)
  if save:
    pagemsg("Saving with comment = %s" % comment)
    page.text = unicode(parsed)
    page.save(comment=comment)
  else:
    pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser("Fix gender errors introduced by fix_ru_noun.py")
parser.add_argument('--pagefile', help="File containing pages and warnings to process")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

# * Page 3574 [[коала]]: WARNING: Gender mismatch, existing=m-an,f-an, new=f-an
lines = [x.strip() for x in codecs.open(args.pagefile, "r", "utf-8")]
for i, line in blib.iter_items(lines, start, end):
  m = re.search("^\* Page [0-9]+ \[\[(.*?)\]\]: WARNING: Gender mismatch, existing=(.*?), new=.*?$", line)
  if not m:
    msg("WARNING: Can't process line: %s" % line)
  else:
    page, genders = m.groups()
    msg("Page %s %s: Processing: %s" % (i, page, line))
    process_page(i, pywikibot.Page(site, page), args.save, args.verbose,
        re.split(",", genders))
Example #40
0
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    parsed = blib.parse(page)

    found_headword_template = False
    for t in parsed.filter_templates():
        if unicode(t.name) in ["ru-adj"]:
            found_headword_template = True
    if not found_headword_template:
        notes = []
        for t in parsed.filter_templates():
            if unicode(t.name) in [
                    "ru-noun", "ru-noun+", "ru-proper noun", "ru-proper noun+"
            ]:
                notes.append("found noun header (%s)" % unicode(t.name))
            if unicode(t.name) == "head":
                notes.append("found head header (%s)" % getparam(t, "2"))
        pagemsg("Missing adj headword template%s" %
                (notes and "; " + ",".join(notes)))


parser = blib.create_argparser("Find missing adjective headwords")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for index, page in blib.references("Template:ru-decl-adj", start, end):
    process_page(index, page)
Example #41
0
          msg("Would remove past overrides and add arg5=b")
        else:
          msg("WARNING: Remaining past overrides: past_m=%s, past_f=%s, past_n=%s, past_pl=%s, expected_past_m=%s, expected_past_f=%s, expected_past_n=%s, expected_past_pl=%s" %
              (past_m, past_f, past_n, past_pl, expected_past_m, expected_past_f, expected_past_n, expected_past_pl))
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Convert class-7 past overrides to past stress pattern")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for category in ["Russian class 7 verbs"]:
  msg("Processing category: %s" % category)
  for i, page in blib.cat_articles(category, start, end):
    process_page(i, page, args.save, args.verbose)
Example #42
0
                        "Not adding alt=%s because it's the same as the term" %
                        t_alt)
                else:
                    t.add("alt", t_alt)
            if t_tr:
                t.add("tr", t_tr)
            if t_sort:
                t.add("sort", t_sort)
            if t_sc:
                t.add("sc", t_sc)
            notes.append("convert {{%s}} to {{auto cat}}" % tn)

        if unicode(t) != origt:
            pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t)))

    return unicode(parsed), notes


parser = blib.create_argparser("Convert affix cat usages to {{auto cat}}",
                               include_pagefile=True,
                               include_stdin=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_text_on_page,
                           edit=True,
                           stdin=True)
Example #43
0
  if origtext != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (origtext, text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
  elif warn_on_no_change:
    pagemsg("WARNING: No changes")

parser = blib.create_argparser(u"Fix indentation of Pronunciation, Declension, Conjugation, Alternative forms sections")
parser.add_argument("--pagefile",
    help="""List of pages to process.""")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

if args.pagefile:
  lines = [x.strip() for x in codecs.open(args.pagefile, "r", "utf-8")]
  for i, line in blib.iter_items(lines, start, end):
    m = re.search("^Page [0-9]+ (.*?): WARNING: .*?$", line)
    if not m:
      msg("WARNING: Can't process line: %s" % line)
    else:
      page = m.group(1)
      process_page(i, pywikibot.Page(site, page), args.save, args.verbose,
          warn_on_no_change=True)
Example #44
0
  for t in parsed.filter_templates():
    origt = unicode(t)
    tn = tname(t)
    if tn == "la-ndecl":
      num_ndecl_templates += 1
      lemmaspec = getparam(t, "1")
      m = re.search("^(.*)<(.*)>$", lemmaspec)
      if not m:
        pagemsg("WARNING: Unable to parse lemma+spec %s, skipping: %s" % (
          lemmaspec, origt))
        continue
      lemma, spec = m.groups()
      if ".-ium" not in spec:
        spec += ".-ium"
        t.add("1", "%s<%s>" % (lemma, spec))
        pagemsg("Replaced %s with %s" % (origt, unicode(t)))
        notes.append("add .-ium to declension of Latin chemical element")
  if num_ndecl_templates > 1:
    pagemsg("WARNING: Saw multiple {{la-ndecl}} templates, some may not be elements")
    return None, None

  return unicode(parsed), notes

parser = blib.create_argparser("Add missing .-ium to Latin elements",
    include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args, start, end, process_page,
    default_cats=["la:Chemical elements"], edit=True)
Example #45
0
          notes.append("moving past_m %s to arg 3" % past_m)
        else:
          pagemsg("Stem %s and past_m %s are different, putting past_m in param 5" % (
            stem, past_m))
          t.add("5", past_m)
          notes.append("moving past_m %s to arg 5" % past_m)
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Fix up class-8 arguments")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.cat_articles("Russian class 8 verbs", start, end):
  process_page(i, page, args.save, args.verbose)
Example #46
0
                        notes.append(
                            "note transitive unpaired imperfective verb as lacking past passive participle"
                        )
                        pagemsg("Note no PPP, replace %s with %s" %
                                (origt, unicode(t)))
                    elif direc == "paired":
                        pagemsg("Verb actually is paired")
                    elif direc == "fixed":
                        pagemsg("WARNING: Unfixed verb marked as fixed")
                    elif direc == "intrans":
                        pagemsg("WARNING: Transitive verb marked as intrans")

    return unicode(parsed), notes


parser = blib.create_argparser(
    u"Find verbs with missing past passive participles")
parser.add_argument('--fix-pagefile', help="File containing pages to fix.")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

if args.fix_pagefile:
    fixdireclines = [
        x.strip() for x in codecs.open(args.fix_pagefile, "r", "utf-8")
    ]
    fixdirecs = {}
    fixpages = []
    for line in fixdireclines:
        verb, direc = re.split(" ", line)
        fixdirecs[verb] = direc
        fixpages.append(verb)
Example #47
0
      if verbtype == "pf-impers-refl":
        t.add("1", "pf-refl-impers")
        notes.append("pf-impers-refl -> pf-refl-impers")
      if verbtype == "impf-impers-refl":
        t.add("1", "impf-refl-impers")
        notes.append("impf-impers-refl -> impf-refl-impers")
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Change verb type *-impers-refl to *-refl-impers")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, page in blib.cat_articles("Russian verbs", start, end):
  process_page(i, page, args.save, args.verbose)
Example #48
0
            elif re.search(r"^'*optional'*$", para2):
                opt = True
                para2 = None
            origt = unicode(t)
            t.add("1", para1)
            if para2:
                t.add("2", "")
                t.add("3", para2)
            else:
                rmparam(t, "2")
            if req:
                t.add("req", "1")
            if opt:
                t.add("opt", "1")
            blib.set_template_name(t, "para")
            pagemsg("Replaced %s with %s" % (origt, unicode(t)))
            if para2:
                pagemsg("Set additional info param 3=%s in %s" %
                        (para2, unicode(t)))
            notes.append(u"convert {{docparam}} to {{para}}")

    return unicode(parsed), notes


parser = blib.create_argparser("Deprecate {{docparam}} in favor of {{para}}",
                               include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True)
Example #49
0
    return

  if text != newtext:
    if verbose:
      pagemsg("Replacing <<%s>> with <<%s>>" % (text, newtext))

    comment = "Replace raw links with templated links: %s" % ",".join(subbed_links)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = newtext
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

if __name__ == "__main__":
  parser = blib.create_argparser("Replace raw links with templated links")
  parser.add_argument('--lang', help="Language code for language to do")
  args = parser.parse_args()
  start, end = blib.parse_start_end(args.start, args.end)

  if not args.lang:
    raise ValueError("Language code must be specified")
  if args.lang not in languages:
    raise ValueError("Unrecognized language code: %s" % args.lang)
  thislangcode = args.lang
  thislangname, this_remove_accents, this_charset, this_ignore_translit = (
      languages[thislangcode])

  for category in ["%s lemmas" % thislangname, "%s non-lemma forms" % thislangname]:
    msg("Processing category: %s" % category)
    for i, page in blib.cat_articles(category, start, end):
Example #50
0
                         0, re.M)
    if new_secbody != secbody:
        notes.append(
            "remove bad Chinese links (see [[Wiktionary:Grease pit/2019/September#Requesting bot help]])"
        )
        secbody = new_secbody
    subsections = re.split("(^==+[^=\n]+==+\n)", secbody, 0, re.M)

    subsections_to_delete = []
    for k in xrange(1, len(subsections), 2):
        if (subsections[k] in ["===References===\n", "====References====\n"]
                and not subsections[k + 1].strip()):
            subsections_to_delete.append(k)
    if subsections_to_delete:
        for k in reversed(subsections_to_delete):
            del subsections[k:k + 2]
        notes.append("remove empty References section")

    secbody = "".join(subsections)
    sections[j] = secbody.rstrip("\n") + secbody_finalnl + sectail
    return "".join(sections), notes


parser = blib.create_argparser(
    "Remove bad Chinese references and resulting empty References section",
    include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True)
        process_arg_set(arg_set)
        arg_set = []
      else:
        arg_set.append(val)

  for t in parsed.filter_templates():
    tname = unicode(t.name)
    if tname == "ru-decl-noun-see":
      pagemsg("WARNING: Skipping ru-decl-noun-see, can't handle yet: %s" % unicode(t))
    elif tname in ["ru-noun+", "ru-proper noun+"]:
      pagemsg("Found %s" % unicode(t))
      process_new_style_headword(t)
    elif tname in ["ru-noun", "ru-proper noun"]:
      pagemsg("WARNING: Skipping ru-noun or ru-proper noun, can't handle yet: %s" % unicode(t))

parser = blib.create_argparser(u"Find red links in multiword lemmas")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

msg("Reading Russian lemmas")
for i, page in blib.cat_articles("Russian lemmas", start, end):
  lemmas.add(unicode(page.title()))

for pos in ["nouns", "proper nouns"]:
  tracking_page = "Template:tracking/ru-headword/space-in-headword/" + pos
  msg("PROCESSING REFERENCES TO: %s" % tracking_page)
  for index, page in blib.references(tracking_page, start, end):
    process_page(index, page, args.verbose)

for lemma, nonexistent_msg in sorted(nonexistent_lemmas.items(), key=lambda pair:(-lemma_count[pair[0]], pair[0])):
  msg("* [[%s]] (%s occurrence%s): %s (refs: %s)" % (lemma, lemma_count[lemma],
Example #52
0
                    pagemsg("Existing text for form %s: [[%s]]" % (
                      formpagename, text))
                    if save:
                      formpage.delete(comment)
                    else:
                      pagemsg("Would delete page %s with comment=%s" %
                          (formpagename, comment))

      notes.append("fix 3olda -> %s" % direc)
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  return unicode(parsed), notes

parser = blib.create_argparser("Fix up class 3a")
parser.add_argument('--direcfile', help="File containing pages to fix and directives.")
parser.add_argument('--delete-bad', action="store_true", help="Delete bad forms.")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

pagedirecs = []
lines = [x.strip() for x in codecs.open(args.direcfile, "r", "utf-8")]
for i, line in blib.iter_items(lines, start, end):
  if line.startswith("#"):
    msg("Skipping comment: %s" % line)
  else:
    page, direc = re.split(" ", line)
    def do_process_page(page, index, parsed):
      return process_page(index, page, direc, args.delete_bad, args.verbose)
    blib.do_edit(pywikibot.Page(site, page), i, do_process_page, save=args.save,
Example #53
0
    #    but it's the default in ru-noun-table unless the lemma is plural.
    #    So remove n=both, generate the arguments, and see if the actual
    #    value of args.n is b (for "both"); if not, set n=both.
    else:
      assert headword_n == "b"
      rmparam(see_template, "n")
      see_generate_template = re.sub(r"^\{\{ru-noun-table", "{{ru-generate-noun-args",
          unicode(see_template))
      see_generate_result = expand_text(see_generate_template)
      if not see_generate_result:
        pagemsg("WARNING: Error generating ru-noun-table args")
        return None
      see_args = ru.split_generate_args(see_generate_result)
      if see_args["n"] != "b":
        see_template.add("n", "both")

  comment = "Replace ru-decl-noun-see with ru-noun-table, taken from headword template (%s)" % unicode(headword_template.name)
  if save:
    pagemsg("Saving with comment = %s" % comment)
    page.text = unicode(parsed)
    page.save(comment=comment)
  else:
    pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser("Convert ru-decl-noun-see into ru-noun-table decl template, taken from headword ru-(proper )noun+ template")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for index, page in blib.references("Template:ru-decl-noun-see", start, end):
  process_page(index, page, args.save, args.verbose)
Example #54
0
        if g not in genders:
          pagemsg("WARNING: Saw decl gender %s that disagrees with headword gender(s) %s: headt=%s, declt=%s" % (
            g, ",".join(genders), unicode(headt), unicode(t)))
          continue

        blib.set_template_name(t, "sa-decl-noun-%s" % g)
        rmparam(t, "n")
        rmparam(t, "4")
        rmparam(t, "3")
        rmparam(t, "2")
        t.add("1", tr)
        notes.append("convert {{%s}} to {{sa-decl-noun-%s}}" % (tn, g))
      else:
        pagemsg("WARNING: Saw unrecognized decl template: %s" % unicode(t))

    if origt != unicode(t):
      pagemsg("Replaced %s with %s" % (origt, unicode(t)))

  if headt:
    pagemsg("WARNING: Saw {{sa-noun}} without {{sa-decl-noun-*}}: %s" % unicode(headt))

  return unicode(parsed), notes

parser = blib.create_argparser("Convert old {{sa-decl-noun-*}} templates to new ones",
  include_pagefile=True, include_stdin=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args, start, end, process_text_on_page, edit=True, stdin=True,
  default_cats=["Sanskrit nouns"])
Example #55
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import pywikibot, re, sys, codecs, argparse

import blib
from blib import getparam, rmparam, msg, site

parser = blib.create_argparser(u"Purge (null-save) pages in category or references")
parser.add_argument('--cat', help="Category to purge")
parser.add_argument('--ref', help="References to purge")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

pages = []
if args.cat:
  pages_to_list = blib.cat_articles(args.cat, start, end)
else:
  pages_to_list = blib.references(args.ref, start, end)
for i, page in pages_to_list:
  # msg("Page %s %s: Null-saving" % (i, unicode(page.title())))
  page.save(comment="null save")
Example #56
0
                errandpagemsg("WARNING: Already found %s section" % lang)
                return
            if foundlang > lang:
                insert_before = j - 1
                break
        if insert_before == 0:
            # Add to the end
            newtext = curtext.rstrip("\n") + "\n\n----\n\n" + contents
            return newtext, comment
        sections[insert_before:insert_before] = contents.rstrip(
            "\n") + "\n\n----\n\n"
        return "".join(sections), comment


if __name__ == "__main__":
    parser = blib.create_argparser("Push new entries from generate_entries.py")
    parser.add_argument('--direcfile', help="File containing entries.")
    parser.add_argument('--comment', help="Comment to use.", required="true")
    parser.add_argument('--lang', help="Language of entries.", required="true")
    args = parser.parse_args()
    start, end = blib.parse_start_end(args.start, args.end)

    lines = codecs.open(args.direcfile, "r", "utf-8")

    index_pagename_and_text = blib.yield_text_from_find_regex(
        lines, args.verbose)
    for _, (index, pagename,
            text) in blib.iter_items(index_pagename_and_text,
                                     start,
                                     end,
                                     get_name=lambda x: x[1],
Example #57
0
        oldt = unicode(t)
        del t.params[:]
        t.name = "fr-conj-auto"
        if refl:
          t.add("refl", "yes")
        if aux:
          t.add("aux", aux)
        newt = unicode(t)
        pagemsg("Replacing %s with %s" % (oldt, newt))
        notes.append("replaced {{%s}} with %s" % (name, newt))

  newtext = unicode(parsed)
  if newtext != text:
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = newtext
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser("Convert old fr-conj-* to fr-conj-auto")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for cat in ["French verbs"]:
  msg("Processing category: %s" % cat)
  for i, page in blib.cat_articles(cat, start, end):
    process_page(i, page, args.save, args.verbose)
Example #58
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import pywikibot, re, sys, codecs, argparse

import blib
from blib import getparam, rmparam, msg, site

parser = blib.create_argparser(u"Find verbs with impersonal conjugations")
parser.add_argument('--verbfile', help="File listing verbs to check.")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for i, line in blib.iter_items(codecs.open(args.verbfile, "r", "utf-8"), start, end):
  page = pywikibot.Page(site, line.strip())
  if "-impers|" in page.text:
    msg("Page %s %s: Found impersonal conjugation" % (i, unicode(page.title())))
  else:
    msg("Page %s %s: No impersonal conjugation" % (i, unicode(page.title())))
Example #59
0
            (",".join(manual_ppps), ",".join(auto_ppps), unicode(t)))
      else: # no break in for loop
        for m in notsamemsgs:
          pagemsg(m)

    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Infer the past passive participle variant from the actual PPP")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for category in ["Russian verbs"]:
  for i, page in blib.cat_articles(category, start, end):
    process_page(i, page, args.save, args.verbose)
Example #60
0
            found_headword_template = True
            if getparam(t, "3") == "-":
                found_invariant_headword_template = True
            else:
                headword_templates.append(unicode(t))
        if unicode(t.name) in ["ru-noun-table", "ru-decl-noun-see"]:
            found_decl_template = True
    if found_headword_template and not found_invariant_headword_template:
        if found_decl_template:
            pagemsg("Found old-style headword template(s) %s with decl" %
                    ", ".join(headword_templates))
        else:
            pagemsg("Found old-style headword template(s) %s without decl" %
                    ", ".join(headword_templates))


parser = blib.create_argparser("Find Russian nouns without declension")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

#for pos in ["nouns", "proper nouns"]:
#  Do multi-word nouns
#  tracking_page = "Template:tracking/ru-headword/space-in-headword/" + pos
#  msg("Processing references to %s" % tracking_page)
#  for index, page in blib.references(tracking_page, start, end):
#    process_page(index, page)
#  Do all nouns with {{ru-noun}} or {{ru-proper noun}}
for template in ["ru-noun", "ru-proper noun"]:
    for index, page in blib.references("Template:%s" % template, start, end):
        process_page(index, page)