예제 #1
0
def infer_one_page_decls(page, index, text):
  try:
    return infer_one_page_decls_1(page, index, text)
  except StandardError as e:
    msg("%s %s: WARNING: Got an error: %s" % (index, unicode(page.title()), repr(e)))
    traceback.print_exc(file=sys.stdout)
    return None, None
예제 #2
0
def rewrite_pages(refrom, reto, refs, cat, pages, pagefile, pagetitle_sub,
    comment, filter_pages, save, verbose, startFrom, upTo):
  def rewrite_one_page(page, index, text):
    #blib.msg("From: [[%s]], To: [[%s]]" % (refrom, reto))
    text = unicode(text)
    text = reorder_shadda(text)
    zipped_fromto = zip(refrom, reto)
    for fromval, toval in zipped_fromto:
      if pagetitle_sub:
        pagetitle = unicode(page.title())
        fromval = fromval.replace(pagetitle_sub, re.escape(pagetitle))
        toval = toval.replace(pagetitle_sub, pagetitle)
      text = re.sub(fromval, toval, text)
    return text, comment or "replace %s" % (", ".join("%s -> %s" % (f, t) for f, t in zipped_fromto))

  if pages:
    pages = ((pywikibot.Page(blib.site, page), index) for page, index in blib.iter_pages(pages, startFrom, upTo))
  elif pagefile:
    lines = [x.strip() for x in codecs.open(pagefile, "r", "utf-8")]
    pages = ((pywikibot.Page(blib.site, page), index) for page, index in blib.iter_pages(lines, startFrom, upTo))
  elif refs:
    pages = blib.references(refs, startFrom, upTo, includelinks=True)
  else:
    pages = blib.cat_articles(cat, startFrom, upTo)
  for page, index in pages:
    pagetitle = unicode(page.title())
    if filter_pages and not re.search(filter_pages, pagetitle):
      blib.msg("Skipping %s because doesn't match --filter-pages regex %s" %
          (pagetitle, filter_pages))
    else:
      if verbose:
        blib.msg("Processing %s" % pagetitle)
      blib.do_edit(page, index, rewrite_one_page, save=save, verbose=verbose)
예제 #3
0
 def do_one_page_verb(page, index, text):
   pagename = page.title()
   verbcount = 0
   verbids = []
   for template in text.filter_templates():
     if template.name == "ar-conj":
       verbcount += 1
       vnvalue = getparam(template, "vn")
       uncertain = False
       if vnvalue.endswith("?"):
         vnvalue = vnvalue[:-1]
         msg("Page %s %s: Verbal noun(s) identified as uncertain" % (
           index, pagename))
         uncertain = True
       if not vnvalue:
         continue
       vns = re.split(u"[,،]", vnvalue)
       form = getparam(template, "1")
       verbid = "#%s form %s" % (verbcount, form)
       if re.match("^[1I](-|$)", form):
         verbid += " (%s,%s)" % (getparam(template, "2"), getparam(template, "3"))
       no_i3rab_vns = []
       for vn in vns:
         no_i3rab_vns.append(remove_i3rab(pagename, index, verbid, vn))
       newvn = ",".join(no_i3rab_vns)
       if uncertain:
         newvn += "?"
       if newvn != vnvalue:
         msg("Page %s %s: Verb %s, replacing %s with %s" % (
           index, pagename, verbid, vnvalue, newvn))
         addparam(template, "vn", newvn)
         verbids.append(verbid)
   return text, "Remove i3rab from verbal nouns for verb(s) %s" % (
         ', '.join(verbids))
예제 #4
0
def investigate_possible_adj(index, adj_pagename, adv, adv_defns):
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, adj_pagename, txt))

    pagemsg("Trying for adverb %s" % adv)
    page = pywikibot.Page(site, adj_pagename)
    if not page.exists():
        pagemsg("Doesn't exist for adverb %s" % adv)
        return

    text = unicode(page.text)

    retval = lalib.find_latin_section(text, pagemsg)
    if retval is None:
        return

    sections, j, secbody, sectail, has_non_latin = retval

    subsections = re.split("(^===+[^=\n]+===+\n)", secbody, 0, re.M)

    for k in xrange(2, len(subsections), 2):
        parsed = blib.parse_text(subsections[k])
        for t in parsed.filter_templates():
            origt = unicode(t)
            tn = tname(t)
            if tn in ["la-adj", "la-part"]:
                adj = lalib.la_get_headword_from_template(
                    t, adj_pagename, pagemsg)[0]
                adj_defns = lalib.find_defns(subsections[k])
                msg("%s /// %s /// %s /// %s" %
                    (adv, adj, ";".join(adv_defns), ";".join(adj_defns)))
예제 #5
0
def canon_param(pagetitle, index, template, lang, param, paramtr,
                translit_module):
    if isinstance(param, list):
        fromparam, toparam = param
    else:
        fromparam, toparam = (param, param)
    foreign = (pagetitle if fromparam == "page title" else getparam(
        template, fromparam))
    latin = getparam(template, paramtr)
    if not foreign:
        return False
    canonforeign, canonlatin, actions = do_canon_param(pagetitle, index,
                                                       template, lang,
                                                       fromparam, toparam,
                                                       paramtr, foreign, latin,
                                                       translit_module)
    oldtempl = "%s" % unicode(template)
    if canonforeign:
        add_param_handling_head(template, toparam, canonforeign)
    if canonlatin == True:
        template.remove(paramtr)
    elif canonlatin:
        addparam(template, paramtr, canonlatin)
    if canonforeign or canonlatin:
        msg("Page %s %s: Replaced %s with %s" %
            (index, pagetitle, oldtempl, unicode(template)))
    return actions
예제 #6
0
def process_page(index, page, verbose):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def expand_text(tempcall):
        return blib.expand_text(tempcall, pagetitle, pagemsg, verbose)

    pagemsg("Processing")

    text = unicode(page.text)

    if rulib.check_for_alt_yo_terms(text, pagemsg):
        return

    section = blib.find_lang_section_from_text(text, "Russian", pagemsg)
    if not section:
        pagemsg("Couldn't find Russian section for %s" % pagetitle)
        return

    defns = rulib.find_defns(section)
    if not defns:
        pagemsg("Couldn't find definitions for %s" % pagetitle)
        return

    msg("%s %s" % (pagetitle, ';'.join(defns)))
예제 #7
0
def canon_param(pagetitle,
                index,
                template,
                param,
                paramtr,
                include_tempname_in_changelog=False):
    if isinstance(param, list):
        fromparam, toparam = param
    else:
        fromparam, toparam = (param, param)
    arabic = (pagetitle if fromparam == "page title" else getparam(
        template, fromparam))
    latin = getparam(template, paramtr)
    if not arabic:
        return False
    canonarabic, canonlatin, actions = do_canon_param(
        pagetitle, index, template, fromparam, toparam, paramtr, arabic, latin,
        include_tempname_in_changelog)
    oldtempl = "%s" % unicode(template)
    if canonarabic:
        addparam(template, toparam, canonarabic)
    if canonlatin == True:
        template.remove(paramtr)
    elif canonlatin:
        addparam(template, paramtr, canonlatin)
    if canonarabic or canonlatin:
        msg("Page %s %s: Replaced %s with %s" %
            (index, pagetitle, oldtempl, unicode(template)))
    return actions
예제 #8
0
def canon_param(pagetitle, index, template, param, paramtr, translit_module,
    include_tempname_in_changelog=False):
  if isinstance(param, list):
    fromparam, toparam = param
  else:
    fromparam, toparam = (param, param)
  foreign = (pagetitle if fromparam == "page title" else
    getparam(template, fromparam))
  latin = getparam(template, paramtr)
  if not foreign:
    return False
  canonforeign, canonlatin, actions = do_canon_param(pagetitle, index,
      template, fromparam, toparam, paramtr, foreign, latin, translit_module,
      include_tempname_in_changelog)
  oldtempl = "%s" % unicode(template)
  if canonforeign:
    addparam(template, toparam, canonforeign)
  if canonlatin == True:
    template.remove(paramtr)
  elif canonlatin:
    addparam(template, paramtr, canonlatin)
  if canonforeign or canonlatin:
    msg("Page %s %s: Replaced %s with %s" % (index, pagetitle,
      oldtempl, unicode(template)))
  return actions
예제 #9
0
def vocalize_one_page_headwords(pagetitle, index, text):
    actions_taken = []
    for template in text.filter_templates():
        paramschanged = []
        if template.name in arabiclib.arabic_non_verbal_headword_templates:
            paramschanged += vocalize_head(pagetitle, index, template)
            for param in [
                    "pl", "plobl", "cpl", "cplobl", "fpl", "fplobl", "f",
                    "fobl", "m", "mobl", "obl", "el", "sing", "coll", "d",
                    "dobl", "pauc", "cons"
            ]:
                paramschanged += vocalize_param_chain(pagetitle, index,
                                                      template, param)
            if len(paramschanged) > 0:
                if template.has("tr"):
                    tempname = "%s %s" % (template.name,
                                          getparam(template, "tr"))
                else:
                    tempname = template.name
                actions_taken.append("%s (%s)" %
                                     (', '.join(paramschanged), tempname))
    changelog = "vocalize parameters: %s" % '; '.join(actions_taken)
    #if len(actions_taken) > 0:
    msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog))
    return text, changelog
예제 #10
0
def process_page_for_generate(page, index, verbs):
  pagename = unicode(page.title())
  def pagemsg(txt):
    msg("# Page %s %s: %s" % (index, pagename, txt))
  if " " not in pagename:
    pagemsg("WARNING: No space in page title")
    return
  if pagename.startswith("no "):
    prefix, verb_rest = pagename.split(" ", 1)
    if " " in verb_rest:
      verb, rest = verb_rest.split(" ", 1)
    else:
      verb = verb_rest
      rest = ""
    prefix = prefix + " "
  else:
    verb, rest = pagename.split(" ", 1)
    prefix = ""
  if verb not in verbs:
    pagemsg("WARNING: Unrecognized verb '%s'" % verb)
    return
  linked_rest = " ".join(singularize(x) for x in rest.split(" "))
  spec = verbs[verb]
  if spec == "*":
    spec = "<>"
  msg("%s%s%s %s" % (prefix, verb, spec, linked_rest))
예제 #11
0
def create_cat(cat, catargs, extratext=None):
    global args
    if args.pos == "verb":
        pos = "verb"
        shortpos = "verb"
    elif args.pos == "adj":
        pos = "adjective"
        shortpos = "adj"
    elif args.pos == "noun":
        pos = "noun"
        shortpos = "noun"
    else:
        assert False, "Invalid pos %s" % args.pos
    cat = "Belarusian " + cat.replace("~", "%ss" % pos)
    text = "{{be-%s cat%s}}" % (shortpos, "".join("|" + arg
                                                  for arg in catargs))
    if extratext:
        text += "\n%s" % extratext
    num_pages = len(list(blib.cat_articles(cat)))
    if num_pages == 0:
        return
    cat = "Category:" + cat
    page = pywikibot.Page(site, cat)
    if not args.overwrite and page.exists():
        msg("Page %s already exists, not overwriting" % cat)
        return
    page.text = unicode(text)
    changelog = "Creating '%s' with text '%s'" % (cat, text)
    msg("Changelog = %s" % changelog)
    if args.save:
        blib.safe_page_save(page, changelog, errandmsg)
예제 #12
0
 def replace_raw_pos(m):
     if not langnamecode:
         msg("WARNING: Unable to parse langname %s when trying to replace raw link %s"
             % (langname, m.group(0)))
         return m.group(0)
     return "\n* {{l|%s|%s|pos=%s}}" % (langnamecode, m.group(1),
                                        pos_to_pos[m.group(2)])
예제 #13
0
 def do_one_page_verb(page, index, text):
   pagename = page.title()
   verbcount = 0
   verbids = []
   for template in text.filter_templates():
     if template.name == "ar-conj":
       verbcount += 1
       vnvalue = getparam(template, "vn")
       uncertain = False
       if vnvalue.endswith("?"):
         vnvalue = vnvalue[:-1]
         msg("Page %s %s: Verbal noun(s) identified as uncertain" % (
           index, pagename))
         uncertain = True
       if not vnvalue:
         continue
       vns = re.split(u"[,،]", vnvalue)
       form = getparam(template, "1")
       verbid = "#%s form %s" % (verbcount, form)
       if re.match("^[1I](-|$)", form):
         verbid += " (%s,%s)" % (getparam(template, "2"), getparam(template, "3"))
       no_i3rab_vns = []
       for vn in vns:
         no_i3rab_vns.append(remove_i3rab(pagename, index, verbid, vn))
       newvn = ",".join(no_i3rab_vns)
       if uncertain:
         newvn += "?"
       if newvn != vnvalue:
         msg("Page %s %s: Verb %s, replacing %s with %s" % (
           index, pagename, verbid, vnvalue, newvn))
         addparam(template, "vn", newvn)
         verbids.append(verbid)
   return text, "Remove i3rab from verbal nouns for verb(s) %s" % (
         ', '.join(verbids))
def process_page(index, page, contents, origcontents, verbose, comment,
    lang_only, allow_page_creation):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))
  def errandpagemsg(txt):
    errandmsg("Page %s %s: %s" % (index, pagetitle, txt))

  if contents == origcontents:
    pagemsg("Skipping contents for %s because no change" % pagetitle)
    return None, None
  if verbose:
    pagemsg("For [[%s]]:" % pagetitle)
    pagemsg("------- begin text --------")
    msg(contents.rstrip('\n'))
    msg("------- end text --------")
  page_exists = page.exists() and origcontents is not None
  if not page_exists:
    if lang_only or not allow_page_creation:
      errandpagemsg("WARNING: Trying to create page when --lang-only or not --allow-page-creation")
      return None, None
  else:
    if lang_only:
      foundlang = False
      sec_to_search = 0
      sections = re.split("(^==[^=]*==\n)", page.text, 0, re.M)

      for j in xrange(2, len(sections), 2):
        if sections[j-1] == "==%s==\n" % lang_only:
          if foundlang:
            errandpagemsg("WARNING: Found multiple %s sections, skipping page" % lang_only)
            return None, None
          foundlang = True
          sec_to_search = j
      if not sec_to_search:
        errandpagemsg("WARNING: Couldn't find %s section, skipping page" % lang_only)
        return None, None
      m = re.match(r"\A(.*?)(\n*)\Z", sections[sec_to_search], re.S)
      curtext, curnewlines = m.groups()
      curtext = unicodedata.normalize('NFC', curtext)
      supposed_curtext = unicodedata.normalize('NFC', origcontents.rstrip('\n'))
      if curtext != supposed_curtext:
        if curtext == contents.rstrip('\n'):
          pagemsg("Section has already been changed to new text, not saving")
        else:
          errandpagemsg("WARNING: Text has changed from supposed original text, not saving")
        return None, None
      sections[sec_to_search] = contents.rstrip('\n') + curnewlines
      contents = "".join(sections)
    else:
      curtext = unicodedata.normalize('NFC', page.text.rstrip('\n'))
      supposed_curtext = unicodedata.normalize('NFC', origcontents.rstrip('\n'))
      if curtext != supposed_curtext:
        if curtext == contents.rstrip('\n'):
          pagemsg("Page has already been changed to new text, not saving")
        else:
          errandpagemsg("WARNING: Text has changed from supposed original text, not saving")
        return None, None
  return contents, comment
예제 #15
0
def search_noconj(startFrom, upTo):
  for index, page in blib.cat_articles(u"Arabic verbs", startFrom, upTo):
    text = unicode(blib.parse(page))
    pagetitle = page.title()
    if "{{ar-verb" not in text:
      msg("* ar-verb not in {{l|ar|%s}}" % pagetitle)
    if "{{ar-conj" not in text:
      msg("* ar-conj not in {{l|ar|%s}}" % pagetitle)
예제 #16
0
def get_items(lines):
    for line in lines:
        m = re.search("^Page ([0-9]*) (.*): <respelling> *(.*?) *<end>", line)
        if not m:
            # Not a warning, there will be several of these from output of snarf_it_pron.py
            msg("Unrecognized line: %s" % line)
        else:
            yield m.groups()
예제 #17
0
def infer_one_page_decls(page, index, text):
    try:
        return infer_one_page_decls_1(page, index, text)
    except StandardError as e:
        msg("%s %s: WARNING: Got an error: %s" %
            (index, unicode(page.title()), repr(e)))
        traceback.print_exc(file=sys.stdout)
        return None, None
예제 #18
0
def undo_greek_removal(save, verbose, direcfile, startFrom, upTo):
  template_removals = []
  for line in codecs.open(direcfile, "r", encoding="utf-8"):
    line = line.strip()
    m = re.match(r"\* \[\[(.*?)]]: Removed (.*?)=.*?: <nowiki>(.*?)</nowiki>$",
        line)
    if not m:
      msg("WARNING: Unable to parse line: [%s]" % line)
    else:
      template_removals.append(m.groups())

  for current, index in blib.iter_pages(template_removals, startFrom, upTo,
      # key is the page name
      key = lambda x: x[0]):
    pagename, removed_param, template_text = current

    def undo_one_page_greek_removal(page, index, text):
      def pagemsg(txt):
        msg("Page %s %s: %s" % (index, unicode(page.title()), txt))
      template = blib.parse_text(template_text).filter_templates()[0]
      orig_template = unicode(template)
      if getparam(template, "sc") == "polytonic":
        template.remove("sc")
      to_template = unicode(template)
      param_value = getparam(template, removed_param)
      template.remove(removed_param)
      from_template = unicode(template)
      text = unicode(text)
      found_orig_template = orig_template in text
      newtext = text.replace(from_template, to_template)
      changelog = ""
      if newtext == text:
        if not found_orig_template:
          pagemsg("WARNING: Unable to locate 'from' template when undoing Greek param removal: %s"
              % from_template)
        else:
          pagemsg("Original template found, taking no action")
      else:
        if found_orig_template:
          pagemsg("WARNING: Undid removal, but original template %s already present!" %
              orig_template)
        if len(newtext) - len(text) != len(to_template) - len(from_template):
          pagemsg("WARNING: Length mismatch when undoing Greek param removal, may have matched multiple templates: from=%s, to=%s" % (
            from_template, to_template))
        changelog = "Undid removal of %s=%s in %s" % (removed_param,
            param_value, to_template)
        pagemsg("Change log = %s" % changelog)
      return newtext, changelog

    page = pywikibot.Page(site, pagename)
    if not page.exists():
      msg("Page %s %s: WARNING, something wrong, does not exist" % (
        index, pagename))
    else:
      blib.do_edit(page, index, undo_one_page_greek_removal, save=save,
          verbose=verbose)
예제 #19
0
def rewrite_one_page_ar_nisba(page, index, text):
  for template in text.filter_templates():
    if template.name == "ar-nisba":
      if template.has("head") and not template.has(1):
        head = unicode(template.get("head").value)
        template.remove("head")
        addparam(template, "1", head, before=template.params[0].name if len(template.params) > 0 else None)
      if template.has("plhead"):
        blib.msg("%s has plhead=" % page.title())
  return text, "ar-nisba: head= -> 1="
예제 #20
0
def parse_log_file(fn, startFrom, upTo):
  for current, index in blib.iter_pages(yield_page_lines(fn), startFrom, upTo,
      key=lambda x:x[1]):
    pageindex, pagename, lines = current
    for line in lines:
      m = re.match(r"^Page ([0-9/.-]+) (.*)$", line)
      if m:
        msg("Page %s/%s %s" % (pageindex, m.group(1), m.group(2)))
      else:
        msg(line)
예제 #21
0
def test_infer():
  class Page:
    def title(self):
      return "test_infer"
  for pagetext in test_templates:
    text = blib.parse_text(pagetext)
    page = Page()
    newtext, comment = infer_one_page_decls(page, 1, text)
    msg("newtext = %s" % unicode(newtext))
    msg("comment = %s" % comment)
예제 #22
0
def test_infer():
    class Page:
        def title(self):
            return "test_infer"

    for pagetext in test_templates:
        text = blib.parse_text(pagetext)
        page = Page()
        newtext, comment = infer_one_page_decls(page, 1, text)
        msg("newtext = %s" % unicode(newtext))
        msg("comment = %s" % comment)
예제 #23
0
def process_page(index, page, save, verbose, nouns):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def errandpagemsg(txt):
        errandmsg("Page %s %s: %s" % (index, pagetitle, txt))

    def expand_text(tempcall):
        return blib.expand_text(tempcall, pagetitle, pagemsg, verbose)

    pagemsg("Processing")

    if not re.search(u"[иы]й$", pagetitle):
        pagemsg(u"Skipping adjective not in -ый or -ий")
        return

    noun = re.sub(u"[иы]й$", u"ость", pagetitle)
    if noun not in nouns:
        return

    text = unicode(page.text)
    parsed = blib.parse(page)

    for t in parsed.filter_templates():
        tname = unicode(t.name)
        if tname == u"ru-adj-alt-ё":
            pagemsg(u"Skipping alt-ё adjective")
            return

    for t in parsed.filter_templates():
        tname = unicode(t.name)
        if tname == "ru-adj":
            heads = blib.fetch_param_chain(t, "1", "head", pagetitle)
            if len(heads) > 1:
                pagemsg("Skipping adjective with multiple heads: %s" %
                        ",".join(heads))
                return
            tr = getparam(t, "tr")

            nounsection = blib.find_lang_section(noun, "Russian", pagemsg,
                                                 errandpagemsg)
            if not nounsection:
                pagemsg("Couldn't find Russian section for %s" % noun)
                continue
            if "==Etymology" in nounsection:
                pagemsg("Noun %s already has etymology" % noun)
                continue
            if tr:
                msg(u"%s %s+tr1=%s+-ость no-etym" % (noun, heads[0], tr))
            else:
                msg(u"%s %s+-ость no-etym" % (noun, heads[0]))
예제 #24
0
def process_page(page, index, refrom, reto, pagetitle_sub, comment, lang_only,
                 warn_on_no_replacement, verbose):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        blib.msg("Page %s %s: %s" % (index, pagetitle, txt))

    if verbose:
        blib.msg("Processing %s" % pagetitle)
    #blib.msg("From: [[%s]], To: [[%s]]" % (refrom, reto))
    text = unicode(page.text)
    origtext = text
    text = reorder_shadda(text)
    zipped_fromto = zip(refrom, reto)

    def replace_text(text):
        for fromval, toval in zipped_fromto:
            if pagetitle_sub:
                fromval = fromval.replace(pagetitle_sub, re.escape(pagetitle))
                toval = toval.replace(pagetitle_sub, pagetitle)
            text = re.sub(fromval, toval, text, 0, re.M)
        return text

    if not lang_only:
        text = replace_text(text)
    else:
        sec_to_replace = None
        foundlang = False
        sections = re.split("(^==[^=]*==\n)", text, 0, re.M)

        for j in xrange(2, len(sections), 2):
            if sections[j - 1] == "==%s==\n" % lang_only:
                if foundlang:
                    pagemsg(
                        "WARNING: Found multiple %s sections, skipping page" %
                        lang_only)
                    if warn_on_no_replacement:
                        pagemsg("WARNING: No replacements made")
                    return
                foundlang = True
                sec_to_replace = j
                break

        if sec_to_replace is None:
            if warn_on_no_replacement:
                pagemsg("WARNING: No replacements made")
            return
        sections[sec_to_replace] = replace_text(sections[sec_to_replace])
        text = "".join(sections)
    if warn_on_no_replacement and text == origtext:
        pagemsg("WARNING: No replacements made")
    return text, comment or "replace %s" % (", ".join(
        "%s -> %s" % (f, t) for f, t in zipped_fromto))
예제 #25
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-conj-7a", "ru-conj-7b"]:
      past_stem = getparam(t, "4")
      vowel_end = re.search(u"[аэыоуяеиёю́]$", past_stem)
      past_m = getparam(t, "past_m")
      past_f = getparam(t, "past_f")
      past_n = getparam(t, "past_n")
      past_pl = getparam(t, "past_pl")
      if past_m or past_f or past_n or past_pl:
        upast_stem = ru.make_unstressed(past_stem)
        expected_past_m = past_stem + (u"л" if vowel_end else "")
        expected_past_f = upast_stem + u"ла́"
        expected_past_n = upast_stem + u"ло́"
        expected_past_pl = upast_stem + u"ли́"
        if ((not past_m or expected_past_m == past_m) and
            expected_past_f == past_f and
            expected_past_n == past_n and
            expected_past_pl == past_pl):
          msg("Would remove past overrides and add arg5=b")
        else:
          msg("WARNING: Remaining past overrides: past_m=%s, past_f=%s, past_n=%s, past_pl=%s, expected_past_m=%s, expected_past_f=%s, expected_past_n=%s, expected_past_pl=%s" %
              (past_m, past_f, past_n, past_pl, expected_past_m, expected_past_f, expected_past_n, expected_past_pl))
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
예제 #26
0
def undo_ru_auto_accent(save, verbose, direcfile, startFrom, upTo):
  template_removals = []
  for line in codecs.open(direcfile, "r", encoding="utf-8"):
    line = line.strip()
    m = re.search(r"^Page [0-9]+ (.*?): Replaced (\{\{.*?\}\}) with (\{\{.*?\}\})$",
        line)
    if not m:
      msg("WARNING: Unable to parse line: [%s]" % line)
    else:
      template_removals.append(m.groups())

  for current, index in blib.iter_pages(template_removals, startFrom, upTo,
      # key is the page name
      key = lambda x: x[0]):
    pagename, orig_template, repl_template = current
    if not re.search(r"^\{\{(ux|usex|ru-ux|lang)\|", orig_template):
      continue
    def undo_one_page_ru_auto_accent(page, index, text):
      def pagemsg(txt):
        msg("Page %s %s: %s" % (index, unicode(page.title()), txt))
      text = unicode(text)
      if not re.search("^#\*:* *%s" % re.escape(repl_template), text, re.M):
        return None, ""
      found_orig_template = orig_template in text
      newtext = text.replace(repl_template, orig_template)
      changelog = ""
      if newtext == text:
        if not found_orig_template:
          pagemsg("WARNING: Unable to locate 'repl' template when undoing Russian auto-accenting: %s"
              % repl_template)
        else:
          pagemsg("Original template found, taking no action")
      else:
        pagemsg("Replaced %s with %s" % (repl_template, orig_template))
        if found_orig_template:
          pagemsg("WARNING: Undid replacement, but original template %s already present!" %
              orig_template)
        if len(newtext) - len(text) != len(orig_template) - len(repl_template):
          pagemsg("WARNING: Length mismatch when undoing Russian auto-accenting, may have matched multiple templates: orig=%s, repl=%s" % (
            orig_template, repl_template))
        changelog = "Undid auto-accenting (per Wikitiki89) of %s" % (orig_template)
        pagemsg("Change log = %s" % changelog)
      return newtext, changelog

    page = pywikibot.Page(site, pagename)
    if not page.exists():
      msg("Page %s %s: WARNING, something wrong, does not exist" % (
        index, pagename))
    else:
      blib.do_edit(page, index, undo_one_page_ru_auto_accent, save=save,
          verbose=verbose)
예제 #27
0
def correct_one_page_link_formatting(page, index, text):
  text = unicode(text)
  pagetitle = page.title()
  linkschanged = []
  for m in re.finditer(r"\{\{l\|ar\|([^}]*?)\}\} *(?:'*(?:(?:\{\{IPAchar\|)?\(([^{})]*?)\)(?:\}\})?)'*)? *(?:\{\{g\|(.*?)\}\})?", text):
    if not m.group(2) and not m.group(3):
      continue
    msg("On page %s, found match: %s" % (pagetitle, m.group(0)))
    if "|tr=" in m.group(1):
      msg("Skipping because translit already present")
      continue
    if m.group(3):
      if m.group(3) == "m|f":
        gender = "|g=m|g2=f"
      else:
        gender = "|g=%s" % m.group(3)
    else:
      gender = ""
    if m.group(2):
      tr = "|tr=%s" % m.group(2)
    else:
      tr = ""
    repl = "{{l|ar|%s%s%s}}" % (m.group(1), tr, gender)
    msg("Replacing\n%s\nwith\n%s" % (m.group(0), repl))
    newtext = text.replace(m.group(0), repl, 1)
    if newtext == text:
      msg("WARNING: Unable to do replacement")
    else:
      text = newtext
      linkschanged.append(m.group(1))
  return text, "incorporated translit/gender into links: %s" % ', '.join(linkschanged)
예제 #28
0
def rewrite_one_page_ar_nisba(page, index, text):
    for template in text.filter_templates():
        if template.name == "ar-nisba":
            if template.has("head") and not template.has(1):
                head = unicode(template.get("head").value)
                template.remove("head")
                addparam(template,
                         "1",
                         head,
                         before=template.params[0].name
                         if len(template.params) > 0 else None)
            if template.has("plhead"):
                blib.msg("%s has plhead=" % page.title())
    return text, "ar-nisba: head= -> 1="
예제 #29
0
def vocalize_param(pagetitle, index, template, param, paramtr):
  arabic = getparam(template, param)
  latin = getparam(template, paramtr)
  if not arabic:
    return False
  if latin:
    vocalized = do_vocalize_param(pagetitle, index, template, param, arabic, latin)
    if vocalized:
      oldtempl = "%s" % unicode(template)
      addparam(template, param, vocalized)
      msg("Page %s %s: Replaced %s with %s" % (index, pagetitle,
        oldtempl, unicode(template)))
      return vocalized
  return True
예제 #30
0
def read_existing_pages(filename):
    pages_with_langs = {}
    for line in codecs.getreader("utf-8")(gzip.open(filename, "rb"),
                                          errors="replace"):
        line = line.rstrip("\n")
        if re.search("^Page [0-9]+ .*: WARNING: .*", line):
            msg("Skipping warning: %s" % line)
        else:
            m = re.search("^Page [0-9-]+ (.*): Langs=(.*?)$", line)
            if not m:
                msg("WARNING: Unrecognized line: %s" % line)
            else:
                pages_with_langs[m.group(1)] = set(m.group(2).split(","))
    return pages_with_langs
def render_groups(groups):
    def is_noequiv(x):
        return x == "* (no equivalent)"

    def compare_aspect_pair(xpf, ximpf, ypf, yimpf):
        if not is_noequiv(xpf) and not is_noequiv(ypf):
            return cmp(xpf, ypf)
        elif not is_noequiv(ximpf) and not is_noequiv(yimpf):
            return cmp(ximpf, yimpf)
        elif not is_noequiv(xpf) and not is_noequiv(yimpf):
            return cmp(xpf, yimpf)
        elif not is_noequiv(ximpf) and not is_noequiv(ypf):
            return cmp(ximpf, ypf)
        else:
            return 0

    def sort_aspect_pair(x, y):
        xpf, ximpf = x
        ypf, yimpf = y
        # First compare ignoring accents, so that влить goes before вли́ться,
        # then compare with accents so e.g. рассы́пать and рассыпа́ть are ordered
        # consistently.
        retval = compare_aspect_pair(rulib.remove_accents(xpf),
                                     rulib.remove_accents(ximpf),
                                     rulib.remove_accents(ypf),
                                     rulib.remove_accents(yimpf))
        if retval == 0:
            return compare_aspect_pair(xpf, ximpf, ypf, yimpf)
        else:
            return retval

    pfs = []
    impfs = []
    for gr in groups:
        gr = sorted(gr, cmp=sort_aspect_pair)
        for pf, impf in gr:
            pfs.append(pf)
            impfs.append(impf)

    msg("""
====Derived terms====
{{top2}}
''imperfective''
%s
{{mid2}}
''perfective''
%s
{{bottom}}
""" % ("\n".join(impfs), "\n".join(pfs)))
예제 #32
0
def vocalize_param(pagetitle, index, template, param, paramtr):
    arabic = getparam(template, param)
    latin = getparam(template, paramtr)
    if not arabic:
        return False
    if latin:
        vocalized = do_vocalize_param(pagetitle, index, template, param,
                                      arabic, latin)
        if vocalized:
            oldtempl = "%s" % unicode(template)
            addparam(template, param, vocalized)
            msg("Page %s %s: Replaced %s with %s" %
                (index, pagetitle, oldtempl, unicode(template)))
            return vocalized
    return True
예제 #33
0
 def do_process_param(pagetitle, index, pagetext, template, templang, param, paramtr):
   result = process_param(pagetitle, index, template, param, paramtr,
       include_tempname_in_changelog=True)
   if getparam(template, "sc") == "Arab":
     msg("Page %s %s: %s.%s: Removing sc=Arab" % (index, pagetitle,
       template.name, "sc"))
     oldtempl = "%s" % unicode(template)
     template.remove("sc")
     msg("Page %s %s: Replaced %s with %s" %
         (index, pagetitle, oldtempl, unicode(template)))
     newresult = ["remove %s.sc=Arab" % template.name]
     if isinstance(result, list):
       result = result + newresult
     else:
       result = newresult
   return result
예제 #34
0
 def do_process_param(pagetitle, index, template, param, paramtr):
   result = process_param(pagetitle, index, template, param, paramtr,
       include_tempname_in_changelog=True)
   if getparam(template, "sc") == "Arab":
     msg("Page %s %s: %s.%s: Removing sc=Arab" % (index, pagetitle,
       template.name, "sc"))
     oldtempl = "%s" % unicode(template)
     template.remove("sc")
     msg("Page %s %s: Replaced %s with %s" %
         (index, pagetitle, oldtempl, unicode(template)))
     newresult = ["remove %s.sc=Arab" % template.name]
     if isinstance(result, list):
       result = result + newresult
     else:
       result = newresult
   return result
예제 #35
0
def search_iyya_noetym(startFrom, upTo):
  for page, index in blib.cat_articles(u"Arabic nouns", startFrom, upTo):
    text = blib.parse(page)
    pagetitle = page.title()
    etym = False
    suffix = False
    if pagetitle.endswith(u"ية"):
      for t in text.filter_templates():
        if t.name in ["ar-etym-iyya", "ar-etym-nisba-a",
            "ar-etym-noun-nisba", "ar-etym-noun-nisba-linking"]:
          etym = True
        if t.name == "suffix":
          suffix = True
      if not etym:
        msg("Page %s %s: Ends with -iyya, no appropriate etym template%s" % (
          index, pagetitle, " (has suffix template)" if suffix else ""))
예제 #36
0
def process_one_page_headwords(pagetitle, index, text):
  actions = []
  for template in text.filter_templates():
    if template.name in arabiclib.arabic_non_verbal_headword_templates:
      thisactions = []
      tr = getparam(template, "tr")
      thisactions += process_head(pagetitle, index, template)
      for param in ["pl", "plobl", "cpl", "cplobl", "fpl", "fplobl", "f",
          "fobl", "m", "mobl", "obl", "el", "sing", "coll", "d", "dobl",
          "pauc", "cons"]:
        thisactions += process_param_chain(pagetitle, index, template, param)
      if len(thisactions) > 0:
        actions.append("%s: %s" % (template.name, ', '.join(thisactions)))
  changelog = '; '.join(actions)
  #if len(actions) > 0:
  msg("Change log for page %s = %s" % (pagetitle, changelog))
  return text, changelog
예제 #37
0
def process_one_page_headwords(pagetitle, index, text):
  actions = []
  for template in text.filter_templates():
    if template.name in arabiclib.arabic_non_verbal_headword_templates:
      thisactions = []
      tr = getparam(template, "tr")
      thisactions += process_head(pagetitle, index, template)
      for param in ["pl", "plobl", "cpl", "cplobl", "fpl", "fplobl", "f",
          "fobl", "m", "mobl", "obl", "el", "sing", "coll", "d", "dobl",
          "pauc", "cons"]:
        thisactions += process_param_chain(pagetitle, index, template, param)
      if len(thisactions) > 0:
        actions.append("%s: %s" % (template.name, ', '.join(thisactions)))
  changelog = '; '.join(actions)
  #if len(actions) > 0:
  msg("Change log for page %s = %s" % (pagetitle, changelog))
  return text, changelog
def render_groups(groups):
  def is_noequiv(x):
    return x == "* (no equivalent)"
  def compare_aspect_pair(xpf, ximpf, ypf, yimpf):
    if not is_noequiv(xpf) and not is_noequiv(ypf):
      return cmp(xpf, ypf)
    elif not is_noequiv(ximpf) and not is_noequiv(yimpf):
      return cmp(ximpf, yimpf)
    elif not is_noequiv(xpf) and not is_noequiv(yimpf):
      return cmp(xpf, yimpf)
    elif not is_noequiv(ximpf) and not is_noequiv(ypf):
      return cmp(ximpf, ypf)
    else:
      return 0
  def sort_aspect_pair(x, y):
    xpf, ximpf = x
    ypf, yimpf = y
    # First compare ignoring accents, so that влить goes before вли́ться,
    # then compare with accents so e.g. рассы́пать and рассыпа́ть are ordered
    # consistently.
    retval = compare_aspect_pair(ru.remove_accents(xpf), ru.remove_accents(ximpf),
      ru.remove_accents(ypf), ru.remove_accents(yimpf))
    if retval == 0:
      return compare_aspect_pair(xpf, ximpf, ypf, yimpf)
    else:
      return retval

  pfs = []
  impfs = []
  for gr in groups:
    gr = sorted(gr, cmp=sort_aspect_pair)
    for pf, impf in gr:
      pfs.append(pf)
      impfs.append(impf)

  msg("""
====Derived terms====
{{top2}}
''imperfective''
%s
{{mid2}}
''perfective''
%s
{{bottom2}}
""" % ("\n".join(impfs), "\n".join(pfs)))
예제 #39
0
def process_page(page, index):
  global args
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))
  def errpagemsg(txt):
    errmsg("Page %s %s: %s" % (index, pagetitle, txt))
  errpagemsg("Processing references")
  if not args.table_of_uses:
    pagemsg("Processing references")
  aliases = []
  for i, subpage in blib.references(pagetitle, namespaces=[10], only_template_inclusion=False, filter_redirects=args.redirects_only):
    aliases.append(unicode(subpage.title()))
    if not args.table_of_uses:
      process_subpage(page, index, subpage, i)
  if args.table_of_uses:
    msg("%s%s" % (pagetitle.replace("Template:", ""),
      aliases and "," + ",".join(x.replace("Template:", "") for x in aliases) or ""))
예제 #40
0
 def parse_infls(infltext, tr):
   fs = []
   ftrs = []
   pls = []
   pltrs = []
   fpls = []
   fpltrs = []
   for rawinfl in re.split(", *", infltext):
     if not rawinfl:
       continue
     infl = re.match("'*\{\{(?:lang|l)\|ar\|(.*?)\}\}'* *(?:(?:\{\{IPAchar\|)?\((.*?)\)(?:\}\})?)? *\{\{g\|(.*?)\}\}",
       rawinfl)
     if not infl:
       msg("WARNING: Unable to match infl-outside-head %s" % rawinfl)
       continue
     msg("Found infl outside head: %s" % infl.group(0))
     if "|" in infl.group(1):
       msg("WARNING: Found | in head, skipping: %s" % infl.group(1))
       continue
     if infl.group(3) == "f":
       fs.append(infl.group(1))
       ftrs.append(infl.group(2))
     elif infl.group(3) == "p":
       pls.append(infl.group(1))
       pltrs.append(infl.group(2))
     elif infl.group(3) == "f-p":
       fpls.append(infl.group(1))
       fpltrs.append(infl.group(2))
     else:
       msg("WARNING: Unrecognized inflection gender '%s'" % infl.group(3))
   infls = ""
   if tr:
     infls += "|tr=%s" % tr
   def handle_infls(infls, arabic, latin, argname):
     count = 1
     for ar in arabic:
       if count == 1:
         arg = argname
       else:
         arg = "%s%s" % (argname, count)
       infls += "|%s=%s" % (arg, ar)
       if latin[count - 1] != None:
         if count == 1:
           larg = "%str" % argname
         else:
           larg = "%s%str" % (argname, count)
         infls += "|%s=%s" % (larg, latin[count - 1])
       count += 1
     return infls
   infls = handle_infls(infls, fs, ftrs, "f")
   infls = handle_infls(infls, pls, pltrs, "pl")
   infls = handle_infls(infls, fpls, fpltrs, "fpl")
   return infls
예제 #41
0
def search_iyya_noetym(startFrom, upTo):
    for index, page in blib.cat_articles(u"Arabic nouns", startFrom, upTo):
        text = blib.parse(page)
        pagetitle = page.title()
        etym = False
        suffix = False
        if pagetitle.endswith(u"ية"):
            for t in text.filter_templates():
                if t.name in [
                        "ar-etym-iyya", "ar-etym-nisba-a",
                        "ar-etym-noun-nisba", "ar-etym-noun-nisba-linking"
                ]:
                    etym = True
                if t.name == "suffix":
                    suffix = True
            if not etym:
                msg("Page %s %s: Ends with -iyya, no appropriate etym template%s"
                    % (index, pagetitle,
                       " (has suffix template)" if suffix else ""))
예제 #42
0
def create_cat(cat, args, adj=False, verb=False):
    if verb:
        cat = "Category:Russian " + cat.replace("~", "verbs")
        text = "{{ruverbcatboiler}}"
    elif adj:
        cat = "Category:Russian " + cat.replace("~", "adjectives")
        text = "{{ruadjcatboiler|%s}}" % "|".join(args)
    else:
        cat = "Category:Russian " + cat.replace("~", "nouns")
        text = "{{runouncatboiler|%s}}" % "|".join(args)
    page = pywikibot.Page(site, cat)
    if not overwrite and page.exists():
        msg("Page %s already exists, not overwriting" % cat)
        return
    page.text = unicode(text)
    changelog = "Creating '%s' with text '%s'" % (cat, text)
    msg("Changelog = %s" % changelog)
    if dosave:
        blib.safe_page_save(page, changelog, errandmsg)
예제 #43
0
def vocalize_one_page_headwords(pagetitle, index, text):
  actions_taken = []
  for template in text.filter_templates():
    paramschanged = []
    if template.name in arabiclib.arabic_non_verbal_headword_templates:
      paramschanged += vocalize_head(pagetitle, index, template)
      for param in ["pl", "plobl", "cpl", "cplobl", "fpl", "fplobl", "f",
          "fobl", "m", "mobl", "obl", "el", "sing", "coll", "d", "dobl",
          "pauc", "cons"]:
        paramschanged += vocalize_param_chain(pagetitle, index, template, param)
      if len(paramschanged) > 0:
        if template.has("tr"):
          tempname = "%s %s" % (template.name, getparam(template, "tr"))
        else:
          tempname = template.name
        actions_taken.append("%s (%s)" % (', '.join(paramschanged), tempname))
  changelog = "vocalize parameters: %s" % '; '.join(actions_taken)
  #if len(actions_taken) > 0:
  msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog))
  return text, changelog
예제 #44
0
def process_page(index, page, contents, lang, verbose, comment):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def errandpagemsg(txt):
        errandmsg("Page %s %s: %s" % (index, pagetitle, txt))

    if verbose:
        pagemsg("For [[%s]]:" % pagename)
        pagemsg("------- begin text --------")
        msg(contents.rstrip('\n'))
        msg("------- end text --------")
    if not page.exists():
        return contents, comment
    else:
        insert_before = 0
        curtext = page.text
        sections = re.split("(^==[^=]*==\n)", curtext, 0, re.M)

        for j in xrange(2, len(sections), 2):
            m = re.search(r"^==\s*(.*?)\s*==\n", sections[j - 1])
            if not m:
                errandpagemsg("WARNING: Saw bad second-level header: %s" %
                              sections[j - 1].strip())
                return
            foundlang = m.group(1)
            if foundlang == lang:
                errandpagemsg("WARNING: Already found %s section" % lang)
                return
            if foundlang > lang:
                insert_before = j - 1
                break
        if insert_before == 0:
            # Add to the end
            newtext = curtext.rstrip("\n") + "\n\n----\n\n" + contents
            return newtext, comment
        sections[insert_before:insert_before] = contents.rstrip(
            "\n") + "\n\n----\n\n"
        return "".join(sections), comment
예제 #45
0
  def canonicalize_one_page_verb_form(page, index, text):
    pagetitle = page.title()
    msg("Processing page %s" % pagetitle)
    actions_taken = []

    for template in text.filter_templates():
      if template.name == tempname:
        origtemp = unicode(template)
        form = getparam(template, formarg)
        if form:
          addparam(template, formarg, canonicalize_form(form))
        newtemp = unicode(template)
        if origtemp != newtemp:
          msg("Replacing %s with %s" % (origtemp, newtemp))
        if re.match("^[1I](-|$)", form):
          actions_taken.append("form=%s (%s/%s)" % (form,
            getparam(template, str(1+int(formarg))),
            getparam(template, str(2+int(formarg)))))
        else:
          actions_taken.append("form=%s" % form)
    changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % (
        tempname, formarg, '; '.join(actions_taken))
    if len(actions_taken) > 0:
      msg("Change log = %s" % changelog)
    return text, changelog
예제 #46
0
def rewrite_one_page_verb_headword(page, index, text):
  pagetitle = page.title()
  msg("Processing page %s" % pagetitle)
  actions_taken = []

  for template in text.filter_templates():
    if template.name in ["ar-verb"]:
      origtemp = unicode(template)
      form = getparam(template, "form")
      if form:
        # In order to keep in the same order, just forcibly change the
        # param "names" (numbers)
        for pno in xrange(10, 0, -1):
          if template.has(str(pno)):
            template.get(str(pno)).name = str(pno + 1)
        # Make sure form= param is first ...
        template.remove("form")
        addparam(template, "form", canonicalize_form(form), before=template.params[0].name if len(template.params) > 0 else None)
        # ... then forcibly change its name to 1=
        template.get("form").name = "1"
        template.get("1").showkey = False
      newtemp = unicode(template)
      if origtemp != newtemp:
        msg("Replacing %s with %s" % (origtemp, newtemp))
      if re.match("^[1I](-|$)", form):
        actions_taken.append("form=%s (%s/%s)" % (form,
          getparam(template, "2"), getparam(template, "3")))
      else:
        actions_taken.append("form=%s" % form)
  changelog = "ar-verb: form= -> 1= and canonicalize to Roman numerals, move other params up: %s" % '; '.join(actions_taken)
  if len(actions_taken) > 0:
    msg("Change log = %s" % changelog)
  return text, changelog
예제 #47
0
 def process_param(pagetitle, index, pagetext, template, templang, param,
                   paramtr):
     result = canon_param(pagetitle, index, template, lang, param, paramtr,
                          translit_module)
     scvalue = getparam(template, "sc")
     if scvalue in script:
         tname = unicode(template.name)
         if show_template and result == False:
             msg("Page %s %s: %s.%s: Processing %s" %
                 (index, pagetitle, tname, "sc", unicode(template)))
         msg("Page %s %s: %s.%s: Removing sc=%s" %
             (index, pagetitle, tname, "sc", scvalue))
         oldtempl = "%s" % unicode(template)
         template.remove("sc")
         msg("Page %s %s: Replaced %s with %s" %
             (index, pagetitle, oldtempl, unicode(template)))
         newresult = [
             "remove sc=%s in {{%s}}" %
             (scvalue, template_changelog_name(template, lang))
         ]
         if result != False:
             result = result + newresult
         else:
             result = newresult
     return result
예제 #48
0
def output_heads_seen(overall=False):
  if overall:
    dic = overall_head_count
    msg("Overall templates seen:")
  else:
    dic = cat_head_count
    msg("Templates seen per category:")
  for head, count in sorted(dic.items(), key=lambda x:-x[1]):
    msg("  %s = %s" % (head, count))
예제 #49
0
 def fix_one_page_tool_place_noun(page, index, text):
   pagetitle = page.title()
   for t in text.filter_templates():
     if t.name == template:
       if getparam(t, "cap"):
         msg("Page %s %s: Template %s: Remove cap=" %
             (index, pagetitle, template))
         t.remove("cap")
       else:
         msg("Page %s %s: Template %s: Add lc=1" %
             (index, pagetitle, template))
         addparam(t, "lc", "1")
   changelog = "%s: If cap= is present, remove it, else add lc=" % template
   msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog))
   return text, changelog
예제 #50
0
def do_pages(createfn, iterfn=iter_pages):
    pages = iterfn(createfn)
    for current, index in blib.iter_pages(pages, startFrom, upTo, key=lambda x: x[0]):
        pagename, text, changelog = current
        pagetitle = remove_diacritics(pagename)
        if params.offline:
            msg("Text for %s: [[%s]]" % (pagename, text))
            msg("Changelog = %s" % changelog)
        else:
            page = pywikibot.Page(site, pagetitle)
            if page.exists():
                msg("Page %s %s: WARNING, page already exists, skipping" % (index, pagename))
            else:

                def save_text(page, index, parsed):
                    return text, changelog

                blib.do_edit(page, index, save_text, save=params.save, verbose=params.verbose)
예제 #51
0
 def process_param(pagetitle, index, template, param, paramtr):
   result = canon_param(pagetitle, index, template, param, paramtr,
       include_tempname_in_changelog=True)
   if getparam(template, "sc") == "Arab":
     tname = unicode(template.name)
     if show_template and result == False:
       msg("Page %s %s: %s.%s: Processing %s" % (index,
         pagetitle, tname, "sc", unicode(template)))
     msg("Page %s %s: %s.%s: Removing sc=Arab" % (index,
       pagetitle, tname, "sc"))
     oldtempl = "%s" % unicode(template)
     template.remove("sc")
     msg("Page %s %s: Replaced %s with %s" %
         (index, pagetitle, oldtempl, unicode(template)))
     newresult = ["remove %s.sc=Arab" % tname]
     if result != False:
       result = result + newresult
     else:
       result = newresult
   return result
예제 #52
0
 def process_param(pagetitle, index, template, param, paramtr):
   result = canon_param(pagetitle, index, template, param, paramtr,
       translit_module, include_tempname_in_changelog=True)
   scvalue = getparam(template, "sc")
   if scvalue in script:
     tname = unicode(template.name)
     if show_template and result == False:
       msg("Page %s %s: %s.%s: Processing %s" % (index,
         pagetitle, tname, "sc", unicode(template)))
     msg("Page %s %s: %s.%s: Removing sc=%s" % (index,
       pagetitle, tname, "sc", scvalue))
     oldtempl = "%s" % unicode(template)
     template.remove("sc")
     msg("Page %s %s: Replaced %s with %s" %
         (index, pagetitle, oldtempl, unicode(template)))
     newresult = ["remove %s.sc=%s" % (tname, scvalue)]
     if result != False:
       result = result + newresult
     else:
       result = newresult
   return result
예제 #53
0
 def fix_one_page_smp(page, index, text):
   pagetitle = page.title()
   for t in text.filter_templates():
     head = reorder_shadda(getparam(t, "1"))
     if t.name.startswith("ar-decl-"):
       param = "pl"
       pl = getparam(t, param)
       i = 2
       while pl:
         if pl == "smp":
           if head.endswith(TAM):
             msg("Page %s %s: WARNING: Found %s=smp with feminine ending head %s in %s: not changing" % (
               index, pagetitle, param, head, t.name))
           else:
             msg("Page %s %s: Changing %s=smp to %s=sp in %s" % (
               index, pagetitle, param, param, t.name))
             addparam(t, param, "sp")
         param = "pl%s" % i
         pl = getparam(t, param)
         i += 1
   changelog = "Change pl=smp to pl=sp"
   msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog))
   return text, changelog
예제 #54
0
 def pagemsg(txt):
   msg("Page %s %s: %s" % (index, pagetitle, txt))
예제 #55
0
          unicode(t.name), " (NEEDS REVIEW)" if fixed_plural_warning else ""))

  newtext = unicode(parsed)
  if newtext != text:
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = newtext
      blib.try_repeatedly(lambda: page.save(comment=comment), pagemsg,
                    "save page")
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser("Convert head|fr|* to fr-*")
parser.add_argument("--fix-missing-plurals", action="store_true", help="Fix cases with missing plurals by just assuming the default plural.")
parser.add_argument("--lemma-file",help="File containing lemmas to do.")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

if args.lemma_file:
  lines = [x.strip() for x in codecs.open(args.lemma_file, "r", "utf-8")]
  for i, pagename in blib.iter_items(lines, start, end):
    process_page(i, pywikibot.Page(site, pagename), args.save, args.verbose, args.fix_missing_plurals)
else:
  for cat in ["French nouns", "French proper nouns", "French pronouns", "French determiners", "French adjectives", "French verbs", "French participles", "French adverbs", "French prepositions", "French conjunctions", "French interjections", "French idioms", "French phrases", "French abbreviations", "French acronyms", "French initialisms", "French noun forms", "French proper noun forms", "French pronoun forms", "French determiner forms", "French verb forms", "French adjective forms", "French participle forms", "French proverbs", "French prefixes", "French suffixes", "French diacritical marks", "French punctuation marks"]:
  #for cat in ["French adjective forms", "French participle forms", "French proverbs", "French prefixes", "French suffixes", "French diacritical marks", "French punctuation marks"]:
    msg("Processing category: %s" % cat)
    for i, page in blib.cat_articles(cat, start, end):
      process_page(i, page, args.save, args.verbose, args.fix_missing_plurals)
예제 #56
0
      if origt != newt:
        pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(blib.group_notes(notes))
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

parser = blib.create_argparser(u"Convert Japanese headwords from old-style to new-style")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

romaji_to_keep = set()
for i, page in blib.cat_articles("Japanese terms with romaji needing attention"):
  pagetitle = unicode(page.title())
  romaji_to_keep.add(pagetitle)

for ref in ["ja-noun", "ja-adj", "ja-verb", "ja-pos"]:
  msg("Processing references to Template:%s" % ref)
  for i, page in blib.references("Template:%s" % ref, start, end):
    process_page(i, page, args.save, args.verbose, romaji_to_keep)