def process_page(page, index, parsed):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-conj", "ru-conj-old"]:
      param1 = getparam(t, "1")
      param2 = getparam(t, "2")
      if not param2.startswith("8b"):
        continue
      param3 = getparam(t, "3")
      param4 = getparam(t, "4")
      param5 = getparam(t, "5")
      assert not getparam(t, "6")
      if getparam(t, "past_m"):
        errmsg("WARNING: Has past_m=%s" % getparam(t, "past_m"))
      pap = getparam(t, "pap") or getparam(t, "past_adv_part")
      if pap:
        errmsg("WARNING: Has pap=%s" % pap)
      pap2 = getparam(t, "pap2") or getparam(t, "past_adv_part2")
      if pap2:
        errmsg("WARNING: Has pap2=%s" % pap2)
      param4 = rulib.make_unstressed_ru(param4)
      # Fetch non-numbered params.
      non_numbered_params = []
      for param in t.params:
        pname = unicode(param.name)
        if not re.search(r"^[0-9]+$", pname) and pname not in ["lang", "nocat", "tr"]:
          non_numbered_params.append((pname, param.value))
      # Erase all params.
      del t.params[:]
      # Put back numbered params.
      t.add("1", param1)
      t.add("2", param2)
      t.add("3", param3)
      t.add("4", param4)
      if param5:
        t.add("5", param5)
      # Put back non-numbered params.
      for name, value in non_numbered_params:
        t.add(name, value)
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))
      notes.append("rewrite class 8b verb to correspond to module changes")

  return unicode(parsed), notes
 def errandpagemsg(txt):
     msg("Page %s %s: %s" % (index, pagetitle, txt))
     errmsg("Page %s %s: %s" % (index, pagetitle, txt))
 def yield_lemma_non_lemma_page_titles():
   for cat in yield_cats("lemma,non-lemma"):
     msg("Retrieving pages from %s ..." % cat)
     errmsg("Retrieving pages from %s ..." % cat)
     for index, page in blib.cat_articles(cat, None, None):
       yield page.title()
  def yield_lemma_non_lemma_page_titles():
    for cat in yield_cats("lemma,non-lemma"):
      msg("Retrieving pages from %s ..." % cat)
      errmsg("Retrieving pages from %s ..." % cat)
      for index, page in blib.cat_articles(cat, None, None):
        yield page.title()

  if params.ignore_lemma_non_lemma:
    pages_to_ignore = set(yield_lemma_non_lemma_page_titles())
  else:
    pages_to_ignore = set()

  for category in yield_cats():
    msg("Processing category %s ..." % category)
    errmsg("Processing category %s ..." % category)
    for index, page in blib.cat_articles(category, startFrom, upTo):
      if page.title() not in pages_to_ignore:
        blib.do_edit(page, index, remove_translit_one_page, save=params.save,
            verbose=params.verbose)

pa = blib.init_argparser("Remove translit, sc= from hy, xcl, ka, el, grc templates")
pa.add_argument("--langs", default="all",
    help="Languages to do, a comma-separated list or 'all'")
pa.add_argument("--cattype", default="all",
    help="""Categories to examine ('all' or comma-separated list of
'translit', 'lemma', 'non-lemma'; default 'all')""")
pa.add_argument("--ignore-lemma-non-lemma", action="store_true",
    help="""Ignore lemma and non-lemma pages (useful with '--cattype translit').""")
pa.add_argument("--do-head", action="store_true",
    help="""Remove tr= in {{head|..}}""")
Example #5
0
 def error(text):
     errmsg("ERROR: Processing line: %s" % line)
     errmsg("ERROR: %s" % text)
     assert False
def fatal(line, text):
    errmsg("ERROR: Processing line %s: %s" % (peeker.lineno, line))
    errmsg("ERROR: %s" % text)
    raise ValueError
Example #7
0
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

import re, sys, codecs, argparse

from blib import msg, errmsg
import rulib

parser = argparse.ArgumentParser(
    description="Find lemmas which would have forms saved.")
parser.add_argument('--direcfile', help="File containing directives.")
args = parser.parse_args()

lemmas = set()

for line in codecs.open(args.direcfile, "r", "utf-8"):
    line = line.strip()
    if "Would save with comment" in line:
        m = re.search(
            "Would save with comment.* (?:of|dictionary form) (.*?)(,| after| before| \(add| \(modify| \(update|$)",
            line)
        if not m:
            errmsg("WARNING: Unable to parse line: %s" % line)
        else:
            lemmas.add(rulib.remove_accents(m.group(1)))
for lemma in sorted(lemmas):
    print lemma.encode('utf-8')
 def err(text):
     errmsg("Line %s: %s" % (lineno, text))
Example #9
0
 def errandpagemsg_with_contents(txt):
     pagemsg_with_contents(txt)
     errmsg("Page %s %s: %s: %s" %
            (index, pagetitle, contents_title, txt))
Example #10
0
 def errandpagemsg_with_spelling(txt):
     pagemsg_with_spelling(txt)
     errmsg("Page %s %s: %s: %s" % (index, pagetitle, spelling, txt))
 def yield_lemma_non_lemma_page_titles():
     for cat in yield_cats("lemma,non-lemma"):
         msg("Retrieving pages from %s ..." % cat)
         errmsg("Retrieving pages from %s ..." % cat)
         for index, page in blib.cat_articles(cat, None, None):
             yield page.title()
    def yield_lemma_non_lemma_page_titles():
        for cat in yield_cats("lemma,non-lemma"):
            msg("Retrieving pages from %s ..." % cat)
            errmsg("Retrieving pages from %s ..." % cat)
            for index, page in blib.cat_articles(cat, None, None):
                yield page.title()

    if params.ignore_lemma_non_lemma:
        pages_to_ignore = set(yield_lemma_non_lemma_page_titles())
    else:
        pages_to_ignore = set()

    for category in yield_cats():
        msg("Processing category %s ..." % category)
        errmsg("Processing category %s ..." % category)
        for index, page in blib.cat_articles(category, startFrom, upTo):
            if page.title() not in pages_to_ignore:
                blib.do_edit(page,
                             index,
                             remove_translit_one_page,
                             save=params.save,
                             verbose=params.verbose)


pa = blib.init_argparser(
    "Remove translit, sc= from hy, xcl, ka, el, grc templates")
pa.add_argument("--langs",
                default="all",
                help="Languages to do, a comma-separated list or 'all'")
pa.add_argument("--cattype",
Example #13
0
      newval = re.sub("^#\* #\* ", "#* ", subsections[j], 0, re.M)
      if newval != subsections[j]:
        notes.append("remove double #* prefix")
        pagemsg("Removed double #* prefix")
      subsections[j] = newval
  newtext = "".join(subsections)

  if text != newtext:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, newtext))
    assert notes
    comment = "; ".join(blib.group_notes(notes))
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = newtext
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

if __name__ == "__main__":
  parser = blib.create_argparser("Fix old cite/quote/reference templates")
  args = parser.parse_args()
  start, end = blib.parse_start_end(args.start, args.end)

  for template in replace_templates:
    msg("Processing references to Template:%s" % template)
    errmsg("Processing references to Template:%s" % template)
    for i, page in blib.references("Template:%s" % template, start, end,
        includelinks=True):
      process_page(i, page, args.save, args.verbose)
            changed = origt != unicode(t)
            if changed:
                notes.append("quote-poem -> quote-book with fixed params")

        if changed:
            pagemsg("Replacing %s with %s" % (origt, unicode(t)))

    return parsed, notes


parser = blib.create_argparser(
    "quote-poem -> quote-book with changed params; quote-magazine/quote-news -> quote-journal; quote-Don Quixote -> RQ:Don Quixote"
)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for template in [
        "quote-poem", "quote-magazine", "quote-news", "quote-Don Quixote"
]:
    msg("Processing references to Template:%s" % template)
    errmsg("Processing references to Template:%s" % template)
    for i, page in blib.references("Template:%s" % template,
                                   start,
                                   end,
                                   includelinks=True):
        blib.do_edit(page,
                     i,
                     process_page,
                     save=args.save,
                     verbose=args.verbose)
 def errpagemsg(txt):
   errmsg("Page %s %s: %s" % (index, pagetitle, txt))
Example #16
0
ref_namespaces = args.ref_namespaces and args.ref_namespaces.decode(
    "utf-8") or None

lines = [x.strip() for x in codecs.open(args.tempfile, "r", "utf-8")]

msg('{|class="wikitable"')
msg("! Aliased template !! Canonical template !! #Uses%s%s" %
    (" !! Refs" if args.include_refs else "",
     " !! Suggested disposition" if args.include_disposition else ""))
for ref_and_aliases in lines:
    split_refs = re.split(",", ref_and_aliases)
    mainref = "Template:%s" % split_refs[0]
    aliases = split_refs[1:]
    refs = [(mainref, None)]
    for alias in aliases:
        refs.append(("Template:%s" % alias, mainref))
    for alias, mainref in refs:
        errmsg("Processing references to: %s" % alias)
        template_refs = list(
            blib.references(alias, start, end, namespaces=ref_namespaces))
        num_refs = len(template_refs)
        msg("|-")
        msg("| %s || %s || %s%s%s" %
            ("[[%s]]" % alias if mainref else "'''[[%s]]'''" % alias,
             "[[%s]]" % mainref if mainref else "'''[[%s]]'''" % alias,
             num_refs, " || %s" % ", ".join("[[%s]]" % unicode(ref.title())
                                            for i, ref in template_refs)
             if args.include_refs else "",
             " || ?" if args.include_disposition else ""))
msg("|}")
Example #17
0
 def error(text):
   errmsg("ERROR: Processing line: %s" % line)
   errmsg("ERROR: %s" % text)
   assert False