Example #1
0
import pywikibot, re, sys, codecs, argparse
from collections import defaultdict

import blib
from blib import getparam, rmparam, msg, errandmsg, site, tname


def process_text_on_page(index, pagetitle, text):
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    #pagemsg("Processing")

    if blib.page_should_be_ignored(pagetitle):
        #pagemsg("WARNING: Page should be ignored")
        return

    sections = re.split("(^==[^=\n]+==\n)", text, 0, re.M)
    langs = []
    for j in xrange(1, len(sections), 2):
        m = re.search("^==(.*)==$", sections[j])
        langs.append(m.group(1))
    pagemsg("Languages = %s" % ",".join(langs))


parser = blib.create_argparser("Find languages on pages")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.parse_dump(sys.stdin, process_text_on_page, startsort=start, endsort=end)
Example #2
0
  for k in xrange(1, len(splitsections), 2):
    if splitsections[k] == "English":
      saw_english = True
    else:
      saw_langs.add(splitsections[k])
  if saw_english:
    english_pages[pagetitle] = saw_langs

def process_line(index, line):
  m = re.search("^Page [0-9]+ (.*?): Replacing (.*) with (.*) in .* section in (.*)$", line)
  if not m:
    return
  pagetitle, fromtext, totext, lang = m.groups()
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  for m in re.finditer(r"\{\{(?:m|l|term)\|.*?\|(.*?)\}\}", totext):
    linkpage = m.group(1)
    if linkpage in english_pages and lang not in english_pages[linkpage]:
      pagemsg("Possible false positive for [[%s]] in %s: %s" % (linkpage, lang, fromtext))

parser = blib.create_argparser("Check for likely false-positive links converted from raw links")
parser.add_argument("--direcfile", help="File of output from fix_links.py")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

blib.parse_dump(sys.stdin, find_english_pages)

for index, line in blib.iter_items(codecs.open(args.direcfile, "r", encoding="utf-8"), start, end):
  process_line(index, line)
Example #3
0
                pn = pname(param)
                if pn not in ["1", "g", "g2", "g3", "g4"]:
                    pagemsg("WARNING: Extraneous param %s=: %s" %
                            (pn, unicode(t)))
    return None, None


def process_page(page, index, parsed):
    pagetitle = unicode(page.title())
    text = unicode(page.text)
    return process_text_on_page(index, pagetitle, text)


parser = blib.create_argparser(
    "Check for Latin non-lemma forms with bad params")
parser.add_argument("--stdin",
                    help="Read dump from stdin.",
                    action="store_true")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

if args.stdin:
    blib.parse_dump(sys.stdin, process_text_on_page)
else:
    for i, page in blib.cat_articles("Latin non-lemma forms", start, end):
        blib.do_edit(page,
                     i,
                     process_page,
                     save=args.save,
                     verbose=args.verbose)