subtypes = [x for x in subtypes if x != "N" and x != "I"]
          if "pure" in subtypes:
            subtypes = [x for x in subtypes if x != "pure"]
          else:
            subtypes = subtypes + ["-pure"]
        elif "N" in subtypes:
          newlemma = stem2 + "a"
          subtypes = [x for x in subtypes if x != "N"]
        else:
          newlemma = stem2 + u"ēs"
        subtypes = [x for x in subtypes if x != "-I"]
        newspec = ".".join([decl] + subtypes)
        t.add("1", "%s<%s>" % (newlemma, newspec))
        pagemsg("Replaced %s with %s" % (origt, unicode(t)))
        notes.append("convert 3rd-declension plural term to have plural lemma in {{la-ndecl}}")
        break
      if not compare_new_and_old_templates(origt, unicode(t), pagetitle, pagemsg, errandpagemsg):
        bad_compare = True

  if bad_compare:
    return None, None
  return unicode(parsed), notes

parser = blib.create_argparser("Fix Latin 3rd-decl plural nouns to specify plural lemma, and check new against old {{la-ndecl}} code",
  include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True,
  default_refs=["Template:la-ndecl", "Template:la-adecl"])
    pagemsg("Processing")

    text = unicode(page.text)
    parsed = blib.parse(page)

    found_audio = False
    for t in parsed.filter_templates():
        if unicode(t.name) == "audio" and getparam(t, "lang") == "ru":
            found_audio = True
            break
    if found_audio:
        new_text = re.sub(
            r"\n*\[\[Category:Russian terms with audio links]]\n*", "\n\n",
            text)
        if new_text != text:
            return new_text, "Remove redundant [[:Category:Russian terms with audio links]]"


parser = blib.create_argparser("Remove redundant audio-link categories",
                               include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_page,
                           edit=True,
                           default_cats=["Russian terms with audio links"])
Beispiel #3
0
    sections = re.split("(^==[^=]*==\n)", text, 0, re.M)

    for j in xrange(2, len(sections), 2):
        m = re.search("^==(.*)==\n$", sections[j - 1])
        assert m
        langname = m.group(1)
        if langname not in blib.languages_byCanonicalName:
            pagemsg("WARNING: Can't find language %s" % langname)
            continue
        langcode = blib.languages_byCanonicalName[langname]["code"]
        sections[j] = re.sub(r"\bLANGCODE\b", langcode, sections[j])
        notes.append("replace LANGCODE with %s" % langcode)

    newtext = "".join(sections)
    return newtext, notes


parser = blib.create_argparser(
    "Replace LANGCODE with appropriate language code",
    include_pagefile=True,
    include_stdin=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_text_on_page,
                           edit=True,
                           stdin=True)
Beispiel #4
0
                frobbed_pronuns = []
                if specified_pronuns:
                    notes.append(
                        "remove explicitly specified pronun in {{it-IPA}} because same as page title"
                    )
            blib.set_param_chain(t, frobbed_pronuns, "1", "")
            if t.has("voiced"):
                rmparam(t, "voiced")
                notes.append("remove voiced= in {{it-IPA}}")

        if origt != unicode(t):
            pagemsg("Replaced %s with %s" % (origt, unicode(t)))

    return unicode(parsed), notes


parser = blib.create_argparser(
    "Add missing stress and z resolution to {{it-IPA}}",
    include_pagefile=True,
    include_stdin=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_text_on_page,
                           edit=True,
                           stdin=True,
                           default_refs=["Template:it-IPA"])
                        origt = unicode(t)
                        t.add("2", "+", before="a")
                        notes.append("add adjectival + to %s" % name)
                        pagemsg("Replacing %s with %s" % (origt, unicode(t)))
                    existing_fem = getparam(t, "f")
                    if existing_fem:
                        if new_fem != existing_fem:
                            pagemsg(
                                "WARNING: New feminine %s different from existing feminine %s, not changing: %s"
                                % (new_fem, existing_fem, unicode(t)))
                    else:
                        origt = unicode(t)
                        t.add("f", new_fem)
                        notes.append("add feminine %s to %s" % (new_fem, name))
                        pagemsg("Replacing %s with %s" % (origt, unicode(t)))

    return unicode(parsed), notes


parser = blib.create_argparser("Add feminines to Russian proper names",
                               include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_page,
                           edit=True,
                           default_cats=["Russian surnames"])
                if unicode(t.name) in [
                        "bor", "borrowing"
                ] and (getparam(t, "lang") == "ru" or not getparam(t, "lang")
                       and getparam(t, "1") == "ru"):
                    found_borrowing = True
                    pagemsg("Already contains borrowing: %s" % m.group(0))

    if not found_borrowing:
        pagemsg("WARNING: Can't find proper borrowing template")

    return text, "Use {{inh}}/{{bor}} in Russian for terms inherited or borrowed"


parser = blib.create_argparser(
    "Use {{inh}} and {{bor}} where possible in Russian", include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(
    args,
    start,
    end,
    process_page,
    edit=True,
    default_cats=["Russian lemmas", "Russian non-lemma forms"])

msg("")
msg("Processed borrowed languages:")
for lang, count in sorted(borrowed_langs.items(), key=lambda x: -int(x[1])):
    msg("%s = %s" % (lang, count))
    pagemsg("Processing")

    head = None
    for t in parsed.filter_templates():
        origt = unicode(t)
        tn = tname(t)
        if tn == "be-decl-noun":
            t.name = "be-decl-noun\n"
            for i in [2, 4, 6, 8, 10, 12]:
                val = getparam(t, str(i)).strip()
                if val:
                    t.add(str(i), val + "\n", preserve_spacing=False)
        if origt != unicode(t):
            pagemsg("Replaced %s with %s" % (origt, unicode(t)))
            notes.append("format {{be-decl-noun}} using newlines")

    return unicode(parsed), notes


parser = blib.create_argparser(u"Format be-decl-noun using newlines",
                               include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_page,
                           default_refs=["Template:be-decl-noun"],
                           edit=True)
Beispiel #8
0
  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    if unicode(t.name) == "ru-ux":
      origt = unicode(t)
      if t.has("adj"):
        pagemsg("Removing adj=")
        notes.append("remove adj= from ru-ux")
        rmparam(t, "adj")
      if t.has("shto"):
        pagemsg("Removing shto=")
        notes.append("remove shto= from ru-ux")
        rmparam(t, "shto")
      newt = unicode(t)
      if origt != newt:
        pagemsg("Replaced %s with %s" % (origt, newt))

  return unicode(parsed), notes

parser = blib.create_argparser("Remove adj= and shto= from ru-ux",
  include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args, start, end, process_page, edit=True,
  default_refs=["Template:ru-ux"])
    if "==Alternative forms==" in secbody:
        pagemsg("WARNING: Skipping page with 'Alternative forms' section")
        return

    parsed = blib.parse_text(secbody)
    for t in parsed.filter_templates():
        origt = unicode(t)
        tn = tname(t)
        if tn in ["compound", "affix", "af"] and getparam(
                t, "1") == "hu" and not getparam(t, "pos"):
            t.add("pos", "noun")
        if origt != unicode(t):
            pagemsg("Replaced %s with %s" % (origt, unicode(t)))
            notes.append("add pos=noun to {{%s|hu}}" % tn)
    sections[j] = unicode(parsed) + sectail
    text = "".join(sections)
    return text, notes


parser = blib.create_argparser(u"Add pos=noun to Hungarian compound words",
                               include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_page,
                           default_cats=["Hungarian compound words"],
                           edit=True)
Beispiel #10
0
        if tn == "bg-IPA":
            if not getparam(t, "old"):
                continue
            pron = getparam(t, "1")
            if pron:
                pron = decompose_bulgarian(pron)
                pron = pron.replace(AC, SUB)
                pron = pron.replace(GR, AC)
                pron = pron.replace(SUB, GR)
                t.add("1", pron)
            rmparam(t, "old")
            notes.append(
                "convert {{bg-IPA}} pronunciation to new style (flip acute and grave) and remove old=1"
            )
        if unicode(t) != origt:
            pagemsg("Replaced %s with %s" % (origt, unicode(t)))
    return parsed, notes


parser = blib.create_argparser("Fix {{bg-IPA}} to new format",
                               include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_page,
                           default_refs=["Template:bg-IPA"],
                           edit=1)
Beispiel #11
0
            if getparam(t, "3") == "superlative of":
                base_lemma = getparam(t, "4")
                rmparam(t, "head")
                rmparam(t, "4")
                rmparam(t, "3")
                t.add("1", lemma)
                t.add("2", base_lemma)
                blib.set_template_name(t, "la-adj-sup")
                pagemsg("Replaced %s with %s" % (origt, unicode(t)))
                notes.append("Use {{la-adj-sup}} instead of {{head|la|...}}")
            else:
                pagemsg(
                    "WARNING: Head template doesn't include base form: %s" %
                    unicode(t))

    return unicode(parsed), notes


parser = blib.create_argparser(
    "Fix Latin superlatives formatted using {{head|la|...}}",
    include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args,
                           start,
                           end,
                           process_page,
                           default_cats=["Latin adjective superlative forms"],
                           edit=True)
def process_page(page, index):
    global args

    pagetitle = unicode(page.title())
    for i in xrange(1, args.max_prefix_length + 1):
        if len(pagetitle) >= i:
            prefix = pagetitle[0:i]
            prefixes_by_length[i][prefix].append(pagetitle)


parser = blib.create_argparser("Snarf Italian pronunciations for fixing",
                               include_pagefile=True)
parser.add_argument("--max-prefix-length",
                    type=int,
                    default=10,
                    help="Maximum length of prefixes to check for")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.do_pagefile_cats_refs(args, start, end, process_page)

for i in xrange(1, args.max_prefix_length + 1):
    max_prefixes = sorted(list(prefixes_by_length[i].iteritems()),
                          key=lambda x: -len(x[1]))
    msg("Prefix length = %s" % i)
    msg("------------------- begin -----------------------")
    for prefix, titles in max_prefixes:
        msg(("%%5d %%%ds %%s" % i) % (len(titles), prefix, ",".join(titles)))
    msg("-------------------  end  -----------------------")