Esempio n. 1
0
def fix_smp(save, verbose, startFrom, upTo):
  for template in arabic_decl_templates:
    # Fix the template refs. If cap= is present, remove it; else, add lc=.
    def fix_one_page_smp(page, index, text):
      pagetitle = page.title()
      for t in text.filter_templates():
        head = reorder_shadda(getparam(t, "1"))
        if t.name.startswith("ar-decl-"):
          param = "pl"
          pl = getparam(t, param)
          i = 2
          while pl:
            if pl == "smp":
              if head.endswith(TAM):
                msg("Page %s %s: WARNING: Found %s=smp with feminine ending head %s in %s: not changing" % (
                  index, pagetitle, param, head, t.name))
              else:
                msg("Page %s %s: Changing %s=smp to %s=sp in %s" % (
                  index, pagetitle, param, param, t.name))
                addparam(t, param, "sp")
            param = "pl%s" % i
            pl = getparam(t, param)
            i += 1
      changelog = "Change pl=smp to pl=sp"
      msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog))
      return text, changelog

    for page, index in blib.references("Template:" + template, startFrom, upTo):
      blib.do_edit(page, index, fix_one_page_smp, save=save,
          verbose=verbose)
Esempio n. 2
0
def fix_smp(save, verbose, startFrom, upTo):
    for template in arabic_decl_templates:
        # Fix the template refs. If cap= is present, remove it; else, add lc=.
        def fix_one_page_smp(page, index, text):
            pagetitle = page.title()
            for t in text.filter_templates():
                head = reorder_shadda(getparam(t, "1"))
                if t.name.startswith("ar-decl-"):
                    param = "pl"
                    pl = getparam(t, param)
                    i = 2
                    while pl:
                        if pl == "smp":
                            if head.endswith(TAM):
                                msg("Page %s %s: WARNING: Found %s=smp with feminine ending head %s in %s: not changing"
                                    % (index, pagetitle, param, head, t.name))
                            else:
                                msg("Page %s %s: Changing %s=smp to %s=sp in %s"
                                    % (index, pagetitle, param, param, t.name))
                                addparam(t, param, "sp")
                        param = "pl%s" % i
                        pl = getparam(t, param)
                        i += 1
            changelog = "Change pl=smp to pl=sp"
            msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog))
            return text, changelog

        for index, page in blib.references("Template:" + template, startFrom,
                                           upTo):
            blib.do_edit(page,
                         index,
                         fix_one_page_smp,
                         save=save,
                         verbose=verbose)
Esempio n. 3
0
def do_nouns(poses, headtempls, save, startFrom, upTo):
  def do_one_page_noun(page, index, text):
    pagename = page.title()
    nouncount = 0
    nounids = []
    for template in text.filter_templates():
      if template.name in headtempls:
        nouncount += 1
        params_done = []
        entry = getparam(template, "1")
        for param in template.params:
          value = param.value
          newvalue = remove_i3rab(pagename, index, entry, unicode(value))
          if newvalue != value:
            param.value = newvalue
            params_done.append(unicode(param.name))
        if params_done:
          nounids.append("#%s %s %s (%s)" %
              (nouncount, template.name, entry, ", ".join(params_done)))
    return text, "Remove i3rab from params in %s" % (
          '; '.join(nounids))

  for pos in poses:
    for index, page in blib.cat_articles("Arabic %ss" % pos.lower(), startFrom, upTo):
      blib.do_edit(page, index, do_one_page_noun, save=save, verbose=verbose)
def do_pages(createfn, iterfn=iter_pages):
    pages = iterfn(createfn)
    for current, index in blib.iter_pages(pages,
                                          startFrom,
                                          upTo,
                                          key=lambda x: x[0]):
        pagename, text, changelog = current
        pagetitle = remove_diacritics(pagename)
        if params.offline:
            msg("Text for %s: [[%s]]" % (pagename, text))
            msg("Changelog = %s" % changelog)
        else:
            page = pywikibot.Page(site, pagetitle)
            if page.exists():
                msg("Page %s %s: WARNING, page already exists, skipping" %
                    (index, pagename))
            else:

                def save_text(page, index, parsed):
                    return text, changelog

                blib.do_edit(page,
                             index,
                             save_text,
                             save=params.save,
                             verbose=params.verbose)
Esempio n. 5
0
def rewrite_pages(refrom, reto, refs, cat, pages, pagefile, pagetitle_sub,
    comment, filter_pages, save, verbose, startFrom, upTo):
  def rewrite_one_page(page, index, text):
    #blib.msg("From: [[%s]], To: [[%s]]" % (refrom, reto))
    text = unicode(text)
    text = reorder_shadda(text)
    zipped_fromto = zip(refrom, reto)
    for fromval, toval in zipped_fromto:
      if pagetitle_sub:
        pagetitle = unicode(page.title())
        fromval = fromval.replace(pagetitle_sub, re.escape(pagetitle))
        toval = toval.replace(pagetitle_sub, pagetitle)
      text = re.sub(fromval, toval, text)
    return text, comment or "replace %s" % (", ".join("%s -> %s" % (f, t) for f, t in zipped_fromto))

  if pages:
    pages = ((pywikibot.Page(blib.site, page), index) for page, index in blib.iter_pages(pages, startFrom, upTo))
  elif pagefile:
    lines = [x.strip() for x in codecs.open(pagefile, "r", "utf-8")]
    pages = ((pywikibot.Page(blib.site, page), index) for page, index in blib.iter_pages(lines, startFrom, upTo))
  elif refs:
    pages = blib.references(refs, startFrom, upTo, includelinks=True)
  else:
    pages = blib.cat_articles(cat, startFrom, upTo)
  for page, index in pages:
    pagetitle = unicode(page.title())
    if filter_pages and not re.search(filter_pages, pagetitle):
      blib.msg("Skipping %s because doesn't match --filter-pages regex %s" %
          (pagetitle, filter_pages))
    else:
      if verbose:
        blib.msg("Processing %s" % pagetitle)
      blib.do_edit(page, index, rewrite_one_page, save=save, verbose=verbose)
Esempio n. 6
0
def rewrite_ar_nisba(save, verbose, startFrom, upTo):
    for index, page in blib.references("Template:ar-nisba", startFrom, upTo):
        blib.do_edit(page,
                     index,
                     rewrite_one_page_ar_nisba,
                     save=save,
                     verbose=verbose)
def delete_form(index, lemma, formind, formval, pos, tag_sets_to_delete,
                preserve_diaeresis, save, verbose, diff):
    def pagemsg(txt):
        msg("Page %s %s: form %s %s: %s" %
            (index, lemma, formind, formval, txt))

    if "[" in formval:
        pagemsg("Skipping form value %s with link in it" % formval)
        return

    page = pywikibot.Page(site, remove_macrons(formval, preserve_diaeresis))
    if not page.exists():
        pagemsg("Skipping form value %s, page doesn't exist" % formval)
        return

    def do_delete_form_1(page, index, parsed):
        return delete_form_1(page, index, lemma, formind, formval, pos,
                             tag_sets_to_delete, preserve_diaeresis)

    blib.do_edit(page,
                 index,
                 do_delete_form_1,
                 save=save,
                 verbose=verbose,
                 diff=diff)
def canonicalize_verb_form(save, startFrom, upTo, tempname, formarg):
  # Canonicalize the form in ar-conj.
  # Returns the changed text along with a changelog message.
  def canonicalize_one_page_verb_form(page, index, text):
    pagetitle = page.title()
    msg("Processing page %s" % pagetitle)
    actions_taken = []

    for template in text.filter_templates():
      if template.name == tempname:
        origtemp = unicode(template)
        form = getparam(template, formarg)
        if form:
          addparam(template, formarg, canonicalize_form(form))
        newtemp = unicode(template)
        if origtemp != newtemp:
          msg("Replacing %s with %s" % (origtemp, newtemp))
        if re.match("^[1I](-|$)", form):
          actions_taken.append("form=%s (%s/%s)" % (form,
            getparam(template, str(1+int(formarg))),
            getparam(template, str(2+int(formarg)))))
        else:
          actions_taken.append("form=%s" % form)
    changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % (
        tempname, formarg, '; '.join(actions_taken))
    if len(actions_taken) > 0:
      msg("Change log = %s" % changelog)
    return text, changelog

  for page, index in blib.references("Template:%s" % tempname, startFrom, upTo):
    blib.do_edit(page, index, canonicalize_one_page_verb_form, save=save)
Esempio n. 9
0
def do_nouns(poses, headtempls, save, startFrom, upTo):
  def do_one_page_noun(page, index, text):
    pagename = page.title()
    nouncount = 0
    nounids = []
    for template in text.filter_templates():
      if template.name in headtempls:
        nouncount += 1
        params_done = []
        entry = getparam(template, "1")
        for param in template.params:
          value = param.value
          newvalue = remove_i3rab(pagename, index, entry, unicode(value))
          if newvalue != value:
            param.value = newvalue
            params_done.append(unicode(param.name))
        if params_done:
          nounids.append("#%s %s %s (%s)" %
              (nouncount, template.name, entry, ", ".join(params_done)))
    return text, "Remove i3rab from params in %s" % (
          '; '.join(nounids))

  for pos in poses:
    for page, index in blib.cat_articles("Arabic %ss" % pos.lower(), startFrom, upTo):
      blib.do_edit(page, index, do_one_page_noun, save=save, verbose=verbose)
Esempio n. 10
0
def fix_tool_place_noun(save, verbose, startFrom, upTo):
    for template in ["ar-tool noun", "ar-noun of place", "ar-instance noun"]:

        # Fix the template refs. If cap= is present, remove it; else, add lc=.
        def fix_one_page_tool_place_noun(page, index, text):
            pagetitle = page.title()
            for t in text.filter_templates():
                if t.name == template:
                    if getparam(t, "cap"):
                        msg("Page %s %s: Template %s: Remove cap=" %
                            (index, pagetitle, template))
                        t.remove("cap")
                    else:
                        msg("Page %s %s: Template %s: Add lc=1" %
                            (index, pagetitle, template))
                        addparam(t, "lc", "1")
            changelog = "%s: If cap= is present, remove it, else add lc=" % template
            msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog))
            return text, changelog

        for index, page in blib.references("Template:" + template, startFrom,
                                           upTo):
            blib.do_edit(page,
                         index,
                         fix_one_page_tool_place_noun,
                         save=save,
                         verbose=verbose)
Esempio n. 11
0
def rewrite_verb_headword(save, startFrom, upTo):
    for cat in [u"Arabic verbs"]:
        for index, page in blib.cat_articles(cat, startFrom, upTo):
            blib.do_edit(page,
                         index,
                         rewrite_one_page_verb_headword,
                         save=save)
def process_text_on_non_lemma_page(index, pagetitle, text):
    global args

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []

    parsed = blib.parse_text(text)
    for t in parsed.filter_templates():
        tn = tname(t)
        if tn == "superlative of" and getparam(t, "1") == "it":
            lemma = getparam(t, "2")

            def do_process(page, index, parsed):
                return process_lemma_page(page, index, pagetitle)

            blib.do_edit(pywikibot.Page(site, lemma),
                         index,
                         do_process,
                         save=args.save,
                         verbose=args.verbose,
                         diff=args.diff)
Esempio n. 13
0
def canonicalize_verb_form(save, startFrom, upTo, tempname, formarg):
    # Canonicalize the form in ar-conj.
    # Returns the changed text along with a changelog message.
    def canonicalize_one_page_verb_form(page, index, text):
        pagetitle = page.title()
        msg("Processing page %s" % pagetitle)
        actions_taken = []

        for template in text.filter_templates():
            if template.name == tempname:
                origtemp = unicode(template)
                form = getparam(template, formarg)
                if form:
                    addparam(template, formarg, canonicalize_form(form))
                newtemp = unicode(template)
                if origtemp != newtemp:
                    msg("Replacing %s with %s" % (origtemp, newtemp))
                if re.match("^[1I](-|$)", form):
                    actions_taken.append(
                        "form=%s (%s/%s)" %
                        (form, getparam(template, str(1 + int(formarg))),
                         getparam(template, str(2 + int(formarg)))))
                else:
                    actions_taken.append("form=%s" % form)
        changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % (
            tempname, formarg, '; '.join(actions_taken))
        if len(actions_taken) > 0:
            msg("Change log = %s" % changelog)
        return text, changelog

    for index, page in blib.references("Template:%s" % tempname, startFrom,
                                       upTo):
        blib.do_edit(page, index, canonicalize_one_page_verb_form, save=save)
Esempio n. 14
0
def rewrite_ru_decl_adj(save, verbose, startFrom, upTo):
    for cat in [u"Russian adjectives"]:
        for index, page in blib.cat_articles(cat, startFrom, upTo):
            blib.do_edit(page,
                         index,
                         rewrite_one_page_ru_decl_adj,
                         save=save,
                         verbose=verbose)
Esempio n. 15
0
def process_headwords(save, verbose, startFrom, upTo):
  def process_page(page, index, text):
    return process_one_page_headwords(unicode(page.title()), index, text)
  #for page in blib.references(u"Template:tracking/ar-head/head", startFrom, upTo):
  #for page in blib.references("Template:ar-nisba", startFrom, upTo):
  for cat in [u"Arabic lemmas", u"Arabic non-lemma forms"]:
    for index, page in blib.cat_articles(cat, startFrom, upTo):
      blib.do_edit(page, index, process_page, save=save, verbose=verbose)
Esempio n. 16
0
def process_headwords(save, verbose, startFrom, upTo):
  def process_page(page, index, text):
    return process_one_page_headwords(unicode(page.title()), index, text)
  #for page in blib.references(u"Template:tracking/ar-head/head", startFrom, upTo):
  #for page in blib.references("Template:ar-nisba", startFrom, upTo):
  for cat in [u"Arabic lemmas", u"Arabic non-lemma forms"]:
    for index, page in blib.cat_articles(cat, startFrom, upTo):
      blib.do_edit(page, index, process_page, save=save, verbose=verbose)
Esempio n. 17
0
def rewrite_arz_headword(save, verbose, startFrom, upTo):
    for cat in [u"Egyptian Arabic adjectives", "Egyptian Arabic nouns"]:
        for index, page in blib.cat_articles(cat, startFrom, upTo):
            blib.do_edit(page,
                         index,
                         rewrite_one_page_arz_headword,
                         save=save,
                         verbose=verbose)
Esempio n. 18
0
def rewrite_idafa(save, verbose, startFrom, upTo):
    for template in arabic_decl_templates:
        for index, page in blib.references("Template:" + template, startFrom,
                                           upTo):
            blib.do_edit(page,
                         index,
                         rewrite_one_page_idafa,
                         save=save,
                         verbose=verbose)
Esempio n. 19
0
def undo_greek_removal(save, verbose, direcfile, startFrom, upTo):
  template_removals = []
  for line in codecs.open(direcfile, "r", encoding="utf-8"):
    line = line.strip()
    m = re.match(r"\* \[\[(.*?)]]: Removed (.*?)=.*?: <nowiki>(.*?)</nowiki>$",
        line)
    if not m:
      msg("WARNING: Unable to parse line: [%s]" % line)
    else:
      template_removals.append(m.groups())

  for current, index in blib.iter_pages(template_removals, startFrom, upTo,
      # key is the page name
      key = lambda x: x[0]):
    pagename, removed_param, template_text = current

    def undo_one_page_greek_removal(page, index, text):
      def pagemsg(txt):
        msg("Page %s %s: %s" % (index, unicode(page.title()), txt))
      template = blib.parse_text(template_text).filter_templates()[0]
      orig_template = unicode(template)
      if getparam(template, "sc") == "polytonic":
        template.remove("sc")
      to_template = unicode(template)
      param_value = getparam(template, removed_param)
      template.remove(removed_param)
      from_template = unicode(template)
      text = unicode(text)
      found_orig_template = orig_template in text
      newtext = text.replace(from_template, to_template)
      changelog = ""
      if newtext == text:
        if not found_orig_template:
          pagemsg("WARNING: Unable to locate 'from' template when undoing Greek param removal: %s"
              % from_template)
        else:
          pagemsg("Original template found, taking no action")
      else:
        if found_orig_template:
          pagemsg("WARNING: Undid removal, but original template %s already present!" %
              orig_template)
        if len(newtext) - len(text) != len(to_template) - len(from_template):
          pagemsg("WARNING: Length mismatch when undoing Greek param removal, may have matched multiple templates: from=%s, to=%s" % (
            from_template, to_template))
        changelog = "Undid removal of %s=%s in %s" % (removed_param,
            param_value, to_template)
        pagemsg("Change log = %s" % changelog)
      return newtext, changelog

    page = pywikibot.Page(site, pagename)
    if not page.exists():
      msg("Page %s %s: WARNING, something wrong, does not exist" % (
        index, pagename))
    else:
      blib.do_edit(page, index, undo_one_page_greek_removal, save=save,
          verbose=verbose)
def undo_ru_auto_accent(save, verbose, direcfile, startFrom, upTo):
  template_removals = []
  for line in codecs.open(direcfile, "r", encoding="utf-8"):
    line = line.strip()
    m = re.search(r"^Page [0-9]+ (.*?): Replaced (\{\{.*?\}\}) with (\{\{.*?\}\})$",
        line)
    if not m:
      msg("WARNING: Unable to parse line: [%s]" % line)
    else:
      template_removals.append(m.groups())

  for current, index in blib.iter_pages(template_removals, startFrom, upTo,
      # key is the page name
      key = lambda x: x[0]):
    pagename, orig_template, repl_template = current
    if not re.search(r"^\{\{(ux|usex|ru-ux|lang)\|", orig_template):
      continue
    def undo_one_page_ru_auto_accent(page, index, text):
      def pagemsg(txt):
        msg("Page %s %s: %s" % (index, unicode(page.title()), txt))
      text = unicode(text)
      if not re.search("^#\*:* *%s" % re.escape(repl_template), text, re.M):
        return None, ""
      found_orig_template = orig_template in text
      newtext = text.replace(repl_template, orig_template)
      changelog = ""
      if newtext == text:
        if not found_orig_template:
          pagemsg("WARNING: Unable to locate 'repl' template when undoing Russian auto-accenting: %s"
              % repl_template)
        else:
          pagemsg("Original template found, taking no action")
      else:
        pagemsg("Replaced %s with %s" % (repl_template, orig_template))
        if found_orig_template:
          pagemsg("WARNING: Undid replacement, but original template %s already present!" %
              orig_template)
        if len(newtext) - len(text) != len(orig_template) - len(repl_template):
          pagemsg("WARNING: Length mismatch when undoing Russian auto-accenting, may have matched multiple templates: orig=%s, repl=%s" % (
            orig_template, repl_template))
        changelog = "Undid auto-accenting (per Wikitiki89) of %s" % (orig_template)
        pagemsg("Change log = %s" % changelog)
      return newtext, changelog

    page = pywikibot.Page(site, pagename)
    if not page.exists():
      msg("Page %s %s: WARNING, something wrong, does not exist" % (
        index, pagename))
    else:
      blib.do_edit(page, index, undo_one_page_ru_auto_accent, save=save,
          verbose=verbose)
Esempio n. 21
0
def process_lemma(index, pagetitle, slots, program_args):
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))
  def errandpagemsg(txt):
    errandmsg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  def expand_text(tempcall):
    return blib.expand_text(tempcall, pagetitle, pagemsg, program_args.verbose)

  page = pywikibot.Page(site, pagetitle)
  parsed = blib.parse(page)
  for t in parsed.filter_templates():
    tn = tname(t)
    pos = None
    if tn == "la-conj":
      pos = "verb"
    elif tn == "la-ndecl":
      pos = "noun"
    elif tn == "la-adecl":
      pos = "adj"
    if pos:
      args = lalib.generate_infl_forms(pos, unicode(t), errandpagemsg, expand_text)
      for slot in args:
        matches = False
        for spec in slots:
          if spec == slot:
            matches = True
            break
          if lalib.slot_matches_spec(slot, spec):
            matches = True
            break
        if matches:
          for formpagename in re.split(",", args[slot]):
            if "[" in formpagename or "|" in formpagename:
              pagemsg("WARNING: Skipping page %s with links in it" % formpagename)
            else:
              formpagename = lalib.remove_macrons(formpagename)
              formpage = pywikibot.Page(site, formpagename)
              if not formpage.exists():
                pagemsg("WARNING: Form page %s doesn't exist, skipping" % formpagename)
              elif formpagename == pagetitle:
                pagemsg("WARNING: Skipping dictionary form")
              else:
                def do_process_page(page, index, parsed):
                  return process_page(index, page, program_args)
                blib.do_edit(formpage, index, do_process_page,
                    save=program_args.save, verbose=program_args.verbose,
                    diff=program_args.diff)
def do_pages(createfn, iterfn=iter_pages):
    pages = iterfn(createfn)
    for current, index in blib.iter_pages(pages, startFrom, upTo, key=lambda x: x[0]):
        pagename, text, changelog = current
        pagetitle = remove_diacritics(pagename)
        if params.offline:
            msg("Text for %s: [[%s]]" % (pagename, text))
            msg("Changelog = %s" % changelog)
        else:
            page = pywikibot.Page(site, pagetitle)
            if page.exists():
                msg("Page %s %s: WARNING, page already exists, skipping" % (index, pagename))
            else:

                def save_text(page, index, parsed):
                    return text, changelog

                blib.do_edit(page, index, save_text, save=params.save, verbose=params.verbose)
Esempio n. 23
0
def rewrite_template_names(old, new, removelist, save, verbose,
    startFrom, upTo):
  def rewrite_one_page_template_names(page, index, text):
    actions = []
    for template in text.filter_templates():
      if template.name == old:
        actions.append("rename {{temp|%s}} to {{temp|%s}}" % (old, new))
        template.name = new
      for remove in removelist:
        if template.has(remove):
          template.remove(remove)
          actions.append("remove %s=" % remove)

    return text, '; '.join(actions)

  for index, page in blib.references("Template:%s" % old, startFrom, upTo):
    blib.do_edit(page, index, rewrite_one_page_template_names, save=save,
        verbose=verbose)
Esempio n. 24
0
def restore_removed_pagehead(index, pagetitle, comment, oldrevid):
    global args

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def errpagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))
        errmsg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing page with comment = %s" % comment)
    if re.search('(add|replace).*Etymology section', comment):
        page = pywikibot.Page(site, pagetitle)
        oldtext = page.getOldVersion(oldrevid)
        oldtext_pagehead = re.split("(^==[^=\n]+==\n)", oldtext, 0, re.M)[0]
        if oldtext_pagehead:
            newtext_pagehead = re.split("(^==[^=\n]+==\n)", page.text, 0,
                                        re.M)[0]
            if newtext_pagehead != oldtext_pagehead:
                if newtext_pagehead:
                    errpagemsg(
                        "WARNING: Something weird, old page has pagehead <%s> and new page has different pagehead <%s>"
                        % (oldtext_pagehead, newtext_pagehead))
                    return
                pagemsg("Adding old pagehead <%s> to new page" %
                        oldtext_pagehead)
                pagetext = page.text
                newtext = oldtext_pagehead + pagetext

                def do_process_page(pg, ind, parsed):
                    return newtext, [
                        "Restore missing page head: %s" %
                        oldtext_pagehead.strip()
                    ]

                blib.do_edit(page,
                             index,
                             do_process_page,
                             save=args.save,
                             verbose=args.verbose,
                             diff=args.diff)
def process_non_lemma_page(page, index):
  global args
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))
  pagemsg("Processing")
  text = unicode(page.text)
  parsed = blib.parse_text(text)
  for t in parsed.filter_templates():
    tn = tname(t)
    if tn in ["la-adj-comp", "la-adj-sup"]:
      lemma = getparam(t, "1") or pagetitle
      pos = getparam(t, "pos")
      if pos:
        def do_process(page, index, parsed):
          return process_lemma_page(page, index, tn == "la-adj-comp",
              lemma)
        blib.do_edit(pywikibot.Page(site, lalib.remove_macrons(pos)), index,
            do_process, save=args.save, verbose=args.verbose, diff=args.diff)
      else:
        pagemsg("WARNING: Didn't see positive degree: %s" % unicode(t))
def process_page(index, page, save, verbose, diff):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def errandpagemsg(txt):
        errandmsg("Page %s %s: %s" % (index, pagetitle, txt))

    def expand_text(tempcall):
        return blib.expand_text(tempcall, pagetitle, pagemsg, verbose)

    pagemsg("Processing")

    parsed = blib.parse(page)

    for t in parsed.filter_templates():
        if tname(t) == "la-conj":
            args = lalib.generate_verb_forms(unicode(t), errandpagemsg,
                                             expand_text)
            supforms = args.get("sup_acc", "")
            if supforms:
                supforms = supforms.split(",")
                for supform in supforms:
                    non_impers_part = re.sub("um$", "us", supform)
                    pagemsg(
                        "Line to delete: part %s allbutnomsgn {{la-adecl|%s}}"
                        % (non_impers_part, non_impers_part))

                    def do_correct_nom_sg_n_participle(page, index, parsed):
                        return correct_nom_sg_n_participle(
                            page, index, supform, args["1s_pres_actv_indc"])

                    blib.do_edit(pywikibot.Page(site,
                                                lalib.remove_macrons(supform)),
                                 index,
                                 do_correct_nom_sg_n_participle,
                                 save=save,
                                 verbose=verbose,
                                 diff=diff)
def process_page(index, pos, lemma, subs, infl, save, verbose):
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, lemma, txt))
  def errandpagemsg(txt):
    errandmsg("Page %s %s: %s" % (index, lemma, txt))
  def expand_text(tempcall):
    return blib.expand_text(tempcall, remove_macrons(lemma), pagemsg, verbose)

  pagemsg("Processing")

  args = lalib.generate_infl_forms(pos, infl, errandpagemsg, expand_text)
  if args is None:
    return

  forms_to_delete = []

  for key, form in args.iteritems():
    forms_to_delete.extend(form.split(","))

  for formind, form in blib.iter_items(forms_to_delete):
    def handler(page, formind, parsed):
      return process_form(index, page, lemma, formind, form, subs)
    blib.do_edit(pywikibot.Page(site, remove_macrons(form)), formind, handler, save=save, verbose=verbose)
def delete_form(index, lemma, formind, formval, lang, save, verbose, diff):
    def pagemsg(txt):
        msg("Page %s %s: form %s %s: %s" %
            (index, lemma, formind, formval, txt))

    if "[" in formval:
        pagemsg("Skipping form value %s with link in it" % formval)
        return

    page = pywikibot.Page(site, formval)
    if not page.exists():
        pagemsg("Skipping form value %s, page doesn't exist" % formval)
        return

    def do_delete_form_1(page, index, parsed):
        return delete_form_1(page, index, lemma, formind, formval, lang)

    blib.do_edit(page,
                 index,
                 do_delete_form_1,
                 save=save,
                 verbose=verbose,
                 diff=diff)
def fix_tool_place_noun(save, verbose, startFrom, upTo):
  for template in ["ar-tool noun", "ar-noun of place", "ar-instance noun"]:

    # Fix the template refs. If cap= is present, remove it; else, add lc=.
    def fix_one_page_tool_place_noun(page, index, text):
      pagetitle = page.title()
      for t in text.filter_templates():
        if t.name == template:
          if getparam(t, "cap"):
            msg("Page %s %s: Template %s: Remove cap=" %
                (index, pagetitle, template))
            t.remove("cap")
          else:
            msg("Page %s %s: Template %s: Add lc=1" %
                (index, pagetitle, template))
            addparam(t, "lc", "1")
      changelog = "%s: If cap= is present, remove it, else add lc=" % template
      msg("Page %s %s: Change log = %s" % (index, pagetitle, changelog))
      return text, changelog

    for index, page in blib.references("Template:" + template, startFrom, upTo):
      blib.do_edit(page, index, fix_one_page_tool_place_noun, save=save,
          verbose=verbose)
Esempio n. 30
0
def do_verbs(save, startFrom, upTo):
  def do_one_page_verb(page, index, text):
    pagename = page.title()
    verbcount = 0
    verbids = []
    for template in text.filter_templates():
      if template.name == "ar-conj":
        verbcount += 1
        vnvalue = getparam(template, "vn")
        uncertain = False
        if vnvalue.endswith("?"):
          vnvalue = vnvalue[:-1]
          msg("Page %s %s: Verbal noun(s) identified as uncertain" % (
            index, pagename))
          uncertain = True
        if not vnvalue:
          continue
        vns = re.split(u"[,،]", vnvalue)
        form = getparam(template, "1")
        verbid = "#%s form %s" % (verbcount, form)
        if re.match("^[1I](-|$)", form):
          verbid += " (%s,%s)" % (getparam(template, "2"), getparam(template, "3"))
        no_i3rab_vns = []
        for vn in vns:
          no_i3rab_vns.append(remove_i3rab(pagename, index, verbid, vn))
        newvn = ",".join(no_i3rab_vns)
        if uncertain:
          newvn += "?"
        if newvn != vnvalue:
          msg("Page %s %s: Verb %s, replacing %s with %s" % (
            index, pagename, verbid, vnvalue, newvn))
          addparam(template, "vn", newvn)
          verbids.append(verbid)
    return text, "Remove i3rab from verbal nouns for verb(s) %s" % (
          ', '.join(verbids))
  for page, index in blib.cat_articles("Arabic verbs", startFrom, upTo):
    blib.do_edit(page, index, do_one_page_verb, save=save, verbose=verbose)
Esempio n. 31
0
def do_verbs(save, startFrom, upTo):
  def do_one_page_verb(page, index, text):
    pagename = page.title()
    verbcount = 0
    verbids = []
    for template in text.filter_templates():
      if template.name == "ar-conj":
        verbcount += 1
        vnvalue = getparam(template, "vn")
        uncertain = False
        if vnvalue.endswith("?"):
          vnvalue = vnvalue[:-1]
          msg("Page %s %s: Verbal noun(s) identified as uncertain" % (
            index, pagename))
          uncertain = True
        if not vnvalue:
          continue
        vns = re.split(u"[,،]", vnvalue)
        form = getparam(template, "1")
        verbid = "#%s form %s" % (verbcount, form)
        if re.match("^[1I](-|$)", form):
          verbid += " (%s,%s)" % (getparam(template, "2"), getparam(template, "3"))
        no_i3rab_vns = []
        for vn in vns:
          no_i3rab_vns.append(remove_i3rab(pagename, index, verbid, vn))
        newvn = ",".join(no_i3rab_vns)
        if uncertain:
          newvn += "?"
        if newvn != vnvalue:
          msg("Page %s %s: Verb %s, replacing %s with %s" % (
            index, pagename, verbid, vnvalue, newvn))
          addparam(template, "vn", newvn)
          verbids.append(verbid)
    return text, "Remove i3rab from verbal nouns for verb(s) %s" % (
          ', '.join(verbids))
  for index, page in blib.cat_articles("Arabic verbs", startFrom, upTo):
    blib.do_edit(page, index, do_one_page_verb, save=save, verbose=verbose)
Esempio n. 32
0
def search_category_for_missing_form(form, pos, templates, save, startFrom,
    upTo):
  if not isinstance(templates, list):
    templates = [templates]
  cat = "Arabic %ss" % form
  repltemplate = templates[0]
  msg("---Searching [[Category:%s|%s]] for %s:---" %
      (cat, cat, ' or '.join(["{{temp|%s}}" % temp for temp in templates])))

  def parse_infls(infltext, tr):
    fs = []
    ftrs = []
    pls = []
    pltrs = []
    fpls = []
    fpltrs = []
    for rawinfl in re.split(", *", infltext):
      if not rawinfl:
        continue
      infl = re.match("'*\{\{(?:lang|l)\|ar\|(.*?)\}\}'* *(?:(?:\{\{IPAchar\|)?\((.*?)\)(?:\}\})?)? *\{\{g\|(.*?)\}\}",
        rawinfl)
      if not infl:
        msg("WARNING: Unable to match infl-outside-head %s" % rawinfl)
        continue
      msg("Found infl outside head: %s" % infl.group(0))
      if "|" in infl.group(1):
        msg("WARNING: Found | in head, skipping: %s" % infl.group(1))
        continue
      if infl.group(3) == "f":
        fs.append(infl.group(1))
        ftrs.append(infl.group(2))
      elif infl.group(3) == "p":
        pls.append(infl.group(1))
        pltrs.append(infl.group(2))
      elif infl.group(3) == "f-p":
        fpls.append(infl.group(1))
        fpltrs.append(infl.group(2))
      else:
        msg("WARNING: Unrecognized inflection gender '%s'" % infl.group(3))
    infls = ""
    if tr:
      infls += "|tr=%s" % tr
    def handle_infls(infls, arabic, latin, argname):
      count = 1
      for ar in arabic:
        if count == 1:
          arg = argname
        else:
          arg = "%s%s" % (argname, count)
        infls += "|%s=%s" % (arg, ar)
        if latin[count - 1] != None:
          if count == 1:
            larg = "%str" % argname
          else:
            larg = "%s%str" % (argname, count)
          infls += "|%s=%s" % (larg, latin[count - 1])
        count += 1
      return infls
    infls = handle_infls(infls, fs, ftrs, "f")
    infls = handle_infls(infls, pls, pltrs, "pl")
    infls = handle_infls(infls, fpls, fpltrs, "fpl")
    return infls

  def remove_empty_args(templ):
    templ = re.sub(r"\|+\}", "}", templ)
    templ = re.sub(r"\|\|+([A-Za-z0-9_]+=)", r"|\1", templ)
    return templ

  def correct_one_page_headword_formatting(page, index, text):
    text = unicode(text)
    pagetitle = page.title()
    sawtemp = False
    for temp in templates:
      if "{{%s" % temp in text:
        sawtemp = True
    if not sawtemp:
      if "{{head|ar|" in text:
        msg("* %s not in {{l|ar|%s}} but {{temp|head|ar}} is" % (' or '.join(templates), pagetitle))
      else:
        msg("* %s not in {{l|ar|%s}}, nor {{temp|head|ar}}" % (' or '.join(templates), pagetitle))
    replsfound = 0
    for m in re.finditer(r'(===+%s===+\s*)\{\{head\|ar\|(?:sc=Arab\|)?%s((?:\|[A-Za-z0-9_]+=(?:\[[^\]]*\]|[^|}])*)*)\}\} *(?:(?:\{\{IPAchar\|)?\((.*?)\)(?:\}\})?)? *((?:,[^,\n]*)*)(.*)' % (pos, form), text, re.I):
      replsfound += 1
      msg("Found match: %s" % m.group(0))
      if m.group(5):
        msg("WARNING: Trailing text %s" % m.group(5))
      head = ""
      g = ""
      tr = None
      for infl in re.finditer(r"\|([A-Za-z0-9_]+)=((?:\[[^\]]*\]|[^|}])*)", m.group(2)):
        msg("Found infl within head: %s" % infl.group(0))
        if infl.group(1) == "head":
          head = infl.group(2).replace("'", "")
        elif infl.group(1) == "g":
          g = infl.group(2).replace("'", "")
        elif infl.group(1) == "tr":
          tr = infl.group(2)
        elif infl.group(1) == "sc":
          pass
        else:
          msg("WARNING: Unrecognized argument '%s'" % infl.group(1))
      if m.group(3):
        tr = m.group(3)
      infls = parse_infls(m.group(4), tr)
      repl = "{{%s|%s|%s%s}}" % (repltemplate, head, g, infls)
      repl = remove_empty_args(repl)
      repl = m.group(1) + repl + m.group(5) # Include leading, trailing text
      msg("Replacing\n%s\nwith\n%s" % (m.group(0), repl))
      newtext = text.replace(m.group(0), repl, 1)
      if newtext == text:
        msg("WARNING: Unable to do replacement")
      else:
        text = newtext
    for m in re.finditer(r"(===+%s===+\s*)(?:'*\{\{(?:lang|l)\|ar\|(.*?)\}\}'*|'+([^{}']+)'+) *(?:(?:\{\{IPAchar\|)?\((.*?)\)(?:\}\})?)? *(?:\{\{g\|(.*?)\}\})? *((?:,[^,\n]*)*)(.*)" % pos, text, re.I):
      replsfound += 1
      msg("Found match: %s" % m.group(0))
      if m.group(7):
        msg("WARNING: Trailing text %s" % m.group(7))
      head = m.group(2) or m.group(3)
      g = m.group(5) or ""
      tr = m.group(4)
      infls = parse_infls(m.group(6), tr)
      repl = "{{%s|%s|%s%s}}" % (repltemplate, head, g, infls)
      repl = remove_empty_args(repl)
      repl = m.group(1) + repl + m.group(7) # Include leading, trailing text
      msg("Replacing\n%s\nwith\n%s" % (m.group(0), repl))
      newtext = text.replace(m.group(0), repl, 1)
      if newtext == text:
        msg("WARNING: Unable to do replacement")
      else:
        text = newtext
      # If there's a blank line before and after the category, leave a single
      # blank line
      newtext, nsubs = \
        re.subn(r"\n\n\[\[Category:%s\]\]\n\n" % cat, "\n\n", text, 1)
      if nsubs == 0:
        newtext = re.sub(r"\[\[Category:%s\]\]\n?" % cat, "", text, 1)
      if newtext != text:
        msg("Removed [[Category:%s]]" % cat)
        text = newtext
      else:
        msg("WARNING: Unable to remove [[Category:%s]]" % cat)
    if not sawtemp and replsfound == 0:
      msg("WARNING: No replacements found for {{l|ar|%s}}" % pagetitle)
    return text, "Correct headword formatting for [[:Category:%s]]" % cat

  for index, page in blib.cat_articles(cat, startFrom, upTo):
    blib.do_edit(page, index, correct_one_page_headword_formatting, save=save)
Esempio n. 33
0
def rewrite_verb_headword(save, startFrom, upTo):
  for cat in [u"Arabic verbs"]:
    for page, index in blib.cat_articles(cat, startFrom, upTo):
      blib.do_edit(page, index, rewrite_one_page_verb_headword, save=save)
def clean_verb_headword(save, startFrom, upTo):
  for cat in [u"Arabic verbs"]:
    for index, page in blib.cat_articles(cat, startFrom, upTo):
      blib.do_edit(page, index, clean_one_page_verb_headword, save=save)
Esempio n. 35
0
            t.add("1", "hu")
            # Put remaining parameters in order.
            for name, value, showkey in params:
                if re.search("^[0-9]+$", name):
                    t.add(str(int(name) + 1),
                          value,
                          showkey=showkey,
                          preserve_spacing=False)
                else:
                    t.add(name, value, showkey=showkey, preserve_spacing=False)
            blib.set_template_name(t, "affix")
            notes.append("convert {{hu-suffix}} to {{affix}}")
        if unicode(t) != origt:
            pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t)))

    return unicode(parsed), notes


parser = blib.create_argparser("Clean up {{hu-suffix}}")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for template in ["hu-suffix"]:
    msg("Processing references to Template:%s" % template)
    for i, page in blib.references("Template:%s" % template, start, end):
        blib.do_edit(page,
                     i,
                     process_page,
                     save=args.save,
                     verbose=args.verbose)
Esempio n. 36
0
def correct_link_formatting(save, startFrom, upTo):
  for cat in [u"Arabic lemmas", u"Arabic non-lemma forms"]:
    for index, page in blib.cat_articles(cat, startFrom, upTo):
      blib.do_edit(page, index, correct_one_page_link_formatting, save=save)
Esempio n. 37
0
def rewrite_ar_plural(save, verbose, startFrom, upTo):
  for cat in [u"Arabic plurals"]:
    for page, index in blib.cat_articles(cat, startFrom, upTo):
      blib.do_edit(page, index, rewrite_one_page_ar_plural, save=save, verbose=verbose)
Esempio n. 38
0
      return "test_infer"
  for pagetext in test_templates:
    text = blib.parse_text(pagetext)
    page = Page()
    newtext, comment = infer_one_page_decls(page, 1, text)
    msg("newtext = %s" % unicode(newtext))
    msg("comment = %s" % comment)

parser = blib.create_argparser("Add pronunciation sections to Russian Wiktionary entries")
parser.add_argument('--mockup', action="store_true", help="Use mocked-up test code")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)
mockup = args.mockup

def ignore_page(page):
  if not isinstance(page, basestring):
    page = unicode(page.title())
  if re.search(r"^(Appendix|Appendix talk|User|User talk|Talk):", page):
    return True
  return False

if mockup:
  test_infer()
else:
  for tempname in decl_templates:
    for index, page in blib.references("Template:" + tempname, start, end):
      if ignore_page(page):
        msg("Page %s %s: Skipping due to namespace" % (index, unicode(page.title())))
      else:
        blib.do_edit(page, index, infer_one_page_decls, save=args.save)
Esempio n. 39
0
def rewrite_ar_nisba(save, verbose, startFrom, upTo):
  for index, page in blib.references("Template:ar-nisba", startFrom, upTo):
    blib.do_edit(page, index, rewrite_one_page_ar_nisba, save=save, verbose=verbose)
Esempio n. 40
0
def undo_greek_removal(save, verbose, direcfile, startFrom, upTo):
    template_removals = []
    for line in codecs.open(direcfile, "r", encoding="utf-8"):
        line = line.strip()
        m = re.match(
            r"\* \[\[(.*?)]]: Removed (.*?)=.*?: <nowiki>(.*?)</nowiki>$",
            line)
        if not m:
            msg("WARNING: Unable to parse line: [%s]" % line)
        else:
            template_removals.append(m.groups())

    for current, index in blib.iter_pages(
            template_removals,
            startFrom,
            upTo,
            # key is the page name
            key=lambda x: x[0]):
        pagename, removed_param, template_text = current

        def undo_one_page_greek_removal(page, index, text):
            def pagemsg(txt):
                msg("Page %s %s: %s" % (index, unicode(page.title()), txt))

            template = blib.parse_text(template_text).filter_templates()[0]
            orig_template = unicode(template)
            if getparam(template, "sc") == "polytonic":
                template.remove("sc")
            to_template = unicode(template)
            param_value = getparam(template, removed_param)
            template.remove(removed_param)
            from_template = unicode(template)
            text = unicode(text)
            found_orig_template = orig_template in text
            newtext = text.replace(from_template, to_template)
            changelog = ""
            if newtext == text:
                if not found_orig_template:
                    pagemsg(
                        "WARNING: Unable to locate 'from' template when undoing Greek param removal: %s"
                        % from_template)
                else:
                    pagemsg("Original template found, taking no action")
            else:
                if found_orig_template:
                    pagemsg(
                        "WARNING: Undid removal, but original template %s already present!"
                        % orig_template)
                if len(newtext) - len(text) != len(to_template) - len(
                        from_template):
                    pagemsg(
                        "WARNING: Length mismatch when undoing Greek param removal, may have matched multiple templates: from=%s, to=%s"
                        % (from_template, to_template))
                changelog = "Undid removal of %s=%s in %s" % (
                    removed_param, param_value, to_template)
                pagemsg("Change log = %s" % changelog)
            return newtext, changelog

        page = pywikibot.Page(site, pagename)
        if not page.exists():
            msg("Page %s %s: WARNING, something wrong, does not exist" %
                (index, pagename))
        else:
            blib.do_edit(page,
                         index,
                         undo_one_page_greek_removal,
                         save=save,
                         verbose=verbose)
Esempio n. 41
0
def rewrite_ar_plural(save, verbose, startFrom, upTo):
  for cat in [u"Arabic plurals"]:
    for index, page in blib.cat_articles(cat, startFrom, upTo):
      blib.do_edit(page, index, rewrite_one_page_ar_plural, save=save, verbose=verbose)
Esempio n. 42
0
#!/usr/bin/env python
#coding: utf-8
 
import blib, pywikibot, re, string, sys, codecs
from blib import addparam
import arabiclib
 
def fix(page, index, text):
  for template in text.filter_templates():
    if template.name in arabiclib.arabic_all_headword_templates:
      if template.has("head") and not template.has(1) and not template.has(2) and not template.has(3) and not template.has(4) and not template.has(5) and not template.has(6) and not template.has(7) and not template.has(8):
        head = unicode(template.get("head").value)
        template.remove("head")
        addparam(template, "head", head, before=template.params[0].name if len(template.params) > 0 else None)
 
        if template.params[0].name == "head":
          template.get("head").showkey = False
 
  return text, "ar headword: head= > 1="
 
startFrom, upTo = blib.parse_args()
 
for index, page in blib.references(u"Template:tracking/ar-head/head", startFrom, upTo):
  blib.do_edit(page, index, fix)
Esempio n. 43
0
      msg("Retrieving pages from %s ..." % cat)
      errmsg("Retrieving pages from %s ..." % cat)
      for index, page in blib.cat_articles(cat, None, None):
        yield page.title()

  if params.ignore_lemma_non_lemma:
    pages_to_ignore = set(yield_lemma_non_lemma_page_titles())
  else:
    pages_to_ignore = set()

  for category in yield_cats():
    msg("Processing category %s ..." % category)
    errmsg("Processing category %s ..." % category)
    for index, page in blib.cat_articles(category, startFrom, upTo):
      if page.title() not in pages_to_ignore:
        blib.do_edit(page, index, remove_translit_one_page, save=params.save,
            verbose=params.verbose)

pa = blib.init_argparser("Remove translit, sc= from hy, xcl, ka, el, grc templates")
pa.add_argument("--langs", default="all",
    help="Languages to do, a comma-separated list or 'all'")
pa.add_argument("--cattype", default="all",
    help="""Categories to examine ('all' or comma-separated list of
'translit', 'lemma', 'non-lemma'; default 'all')""")
pa.add_argument("--ignore-lemma-non-lemma", action="store_true",
    help="""Ignore lemma and non-lemma pages (useful with '--cattype translit').""")
pa.add_argument("--do-head", action="store_true",
    help="""Remove tr= in {{head|..}}""")
params = pa.parse_args()
startFrom, upTo = blib.parse_start_end(params.start, params.end)

remove_translit(params, startFrom, upTo)
Esempio n. 44
0

parser = blib.create_argparser(
    "Add pronunciation sections to Russian Wiktionary entries")
parser.add_argument('--mockup',
                    action="store_true",
                    help="Use mocked-up test code")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)
mockup = args.mockup


def ignore_page(page):
    if not isinstance(page, basestring):
        page = unicode(page.title())
    if re.search(r"^(Appendix|Appendix talk|User|User talk|Talk):", page):
        return True
    return False


if mockup:
    test_infer()
else:
    for tempname in decl_templates:
        for index, page in blib.references("Template:" + tempname, start, end):
            if ignore_page(page):
                msg("Page %s %s: Skipping due to namespace" %
                    (index, unicode(page.title())))
            else:
                blib.do_edit(page, index, infer_one_page_decls, save=args.save)
Esempio n. 45
0
parser.add_argument("--comment",
                    help="Comment to use when saving pages.",
                    required=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

fulltext = codecs.open(args.textfile, "r", "utf-8").read()

titles_and_text = re.split(r"\n\n\n\n+", fulltext)

assert len(titles_and_text) % 2 == 0

title_and_text_pairs = []
for i in xrange(0, len(titles_and_text), 2):
    title_and_text_pairs.append((titles_and_text[i], titles_and_text[i + 1]))

for i, (pagetitle, pagetext) in blib.iter_items(title_and_text_pairs,
                                                start,
                                                end,
                                                get_name=lambda x: x[0]):

    def handler(page, index, parsed):
        return process_page(page, index, pagetext,
                            args.comment.decode('utf-8'))

    blib.do_edit(pywikibot.Page(site, pagetitle),
                 i,
                 handler,
                 save=args.save,
                 verbose=args.verbose)
Esempio n. 46
0
def rewrite_arz_headword(save, verbose, startFrom, upTo):
  for cat in [u"Egyptian Arabic adjectives", "Egyptian Arabic nouns"]:
    for index, page in blib.cat_articles(cat, startFrom, upTo):
      blib.do_edit(page, index, rewrite_one_page_arz_headword, save=save,
          verbose=verbose)
Esempio n. 47
0
def process_page(page, index):
    global args
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def expand_text(tempcall):
        return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose)

    text = unicode(page.text)

    retval = lalib.find_latin_section(text, pagemsg)
    if retval is None:
        return

    sections, j, secbody, sectail, has_non_latin = retval

    parsed = blib.parse_text(secbody)
    saw_noun = None
    saw_proper_noun = None
    for t in parsed.filter_templates():
        tn = tname(t)
        if tn == "la-noun":
            if saw_noun:
                pagemsg(
                    "WARNING: Saw multiple nouns %s and %s, not sure how to proceed, skipping"
                    % (unicode(saw_noun), unicode(t)))
                return
            saw_noun = t
        elif tn == "la-proper noun":
            if saw_proper_noun:
                pagemsg(
                    "WARNING: Saw multiple proper nouns %s and %s, not sure how to proceed, skipping"
                    % (unicode(saw_proper_noun), unicode(t)))
                return
            saw_proper_noun = t
    if saw_noun and saw_proper_noun:
        pagemsg(
            "WARNING: Saw both noun and proper noun, can't correct header/headword"
        )
        return
    if not saw_noun and not saw_proper_noun:
        pagemsg(
            "WARNING: Saw neither noun nor proper noun, can't correct header/headword"
        )
        return
    pos = "pn" if saw_proper_noun else "n"
    ht = saw_proper_noun or saw_noun
    if getparam(ht, "indecl"):
        pagemsg("Noun is indeclinable, skipping: %s" % unicode(ht))
        return
    generate_template = blib.parse_text(unicode(ht)).filter_templates()[0]
    blib.set_template_name(generate_template, "la-generate-noun-forms")
    blib.remove_param_chain(generate_template, "lemma", "lemma")
    blib.remove_param_chain(generate_template, "m", "m")
    blib.remove_param_chain(generate_template, "f", "f")
    blib.remove_param_chain(generate_template, "g", "g")
    rmparam(generate_template, "type")
    rmparam(generate_template, "indecl")
    rmparam(generate_template, "id")
    rmparam(generate_template, "pos")
    result = expand_text(unicode(generate_template))
    if not result:
        pagemsg("WARNING: Error generating forms, skipping")
        return
    tempargs = blib.split_generate_args(result)
    forms_seen = set()
    slots_and_forms_to_process = []
    for slot, formarg in tempargs.iteritems():
        forms = formarg.split(",")
        for form in forms:
            if "[" in form or "|" in form:
                continue
            form_no_macrons = lalib.remove_macrons(form)
            if form_no_macrons == pagetitle:
                continue
            if form_no_macrons in forms_seen:
                continue
            forms_seen.add(form_no_macrons)
            slots_and_forms_to_process.append((slot, form))
    for index, (slot, form) in blib.iter_items(
            sorted(slots_and_forms_to_process,
                   key=lambda x: lalib.remove_macrons(x[1]))):

        def handler(page, index, parsed):
            return process_form(page, index, slot, form, pos)

        blib.do_edit(pywikibot.Page(site, lalib.remove_macrons(form)),
                     index,
                     handler,
                     save=args.save,
                     verbose=args.verbose,
                     diff=args.diff)
Esempio n. 48
0
  lineno = 0
  for line in codecs.open(args.direcfile, "r", encoding="utf-8"):
    lineno += 1
    line = line.strip()
    if line.startswith("#"):
      continue
    verb = blib.remove_links(re.sub("<.*?>", "", line))
    verbs[verb] = line
    def do_process_page(page, index, parsed=None):
      pagetitle = unicode(page.title())
      def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))
      pagetext = blib.safe_page_text(page, pagemsg)
      return process_text_on_page_for_full_conj(index, pagetitle, pagetext, verbs)
    page = pywikibot.Page(site, verb)
    blib.do_edit(page, lineno, do_process_page, save=args.save, verbose=args.verbose, diff=args.diff)
elif args.mode == "generate":
  verbs = {}
  for line in codecs.open(args.direcfile, "r", encoding="utf-8"):
    line = line.strip()
    if line.startswith("#"):
      continue
    if " " not in line:
      errandmsg("WARNING: No space in line: %s" %  line)
      continue
    verb, spec = line.split(" ", 1)
    verbs[verb] = spec
  def do_process_page(page, index):
    return process_page_for_generate(page, index, verbs)
  blib.do_pagefile_cats_refs(args, start, end, do_process_page)
else:
Esempio n. 49
0
from blib import getparam, rmparam, msg, site

def process_page(page, index, parsed):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")
  return "#REDIRECT [[Module:ru-verb/documentation]]", "redirect to [[Module:ru-verb/documentation]]"

parser = blib.create_argparser("Redirect ru-conj-* documentation pages")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

types = ["7a", "7b", "8a", "8b", "9a", "9b", "10a", "10c", "11a", "11b",
    "12a", "12b", "13b", "14a", "14b", "14c", "15a", "16a", "16b",
    u"irreg-бежать", u"irreg-спать", u"irreg-хотеть", u"irreg-дать",
    u"irreg-есть", u"irreg-сыпать", u"irreg-лгать", u"irreg-мочь",
    u"irreg-слать", u"irreg-идти", u"irreg-ехать", u"irreg-минуть",
    u"irreg-живописать-миновать", u"irreg-лечь", u"irreg-зиждиться",
    u"irreg-клясть", u"irreg-слыхать-видать", u"irreg-стелить-стлать",
    u"irreg-быть", u"irreg-ссать-сцать", u"irreg-чтить", u"irreg-ошибиться",
    u"irreg-плескать", u"irreg-внимать", u"irreg-обязывать"]
for i, ty in blib.iter_items(types, start, end):
  template = "Template:ru-conj-%s/documentation" % ty
  blib.do_edit(pywikibot.Page(site, template), i, process_page, save=args.save,
    verbose=args.verbose, diff=args.diff)
Esempio n. 50
0
def rewrite_ru_decl_adj(save, verbose, startFrom, upTo):
  for cat in [u"Russian adjectives"]:
    for page, index in blib.cat_articles(cat, startFrom, upTo):
      blib.do_edit(page, index, rewrite_one_page_ru_decl_adj, save=save, verbose=verbose)
Esempio n. 51
0
          if newname == "he-noun form of" and newspecs:
            if name in ["p", "g", "n"]:
              name = "p" + name
          t.add(name, value, showkey=showkey, preserve_spacing=False)
        # Finally add nocap=1 if requested.
        if add_nocap:
          t.add("nocap", "1")

      if unicode(t) != origt:
        pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t)))

    text = unicode(parsed)

  return text, notes

parser = blib.create_argparser("Clean up {{he-*}} templates")
parser.add_argument('--move-dot', help="Move .= outside of template",
    action="store_true")
parser.add_argument('--rename', help="Rename templates",
    action="store_true")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

for template in all_he_form_of_templates:
  for i, page in blib.references("Template:%s" % template, start, end):
    blib.do_edit(page, i,
      lambda page, index, parsed:
        process_page(page, index, parsed, args.move_dot, args.rename),
      save=args.save, verbose=args.verbose
    )
        notes.append("convert 3+ newlines to 2")
    return text, notes


parser = blib.create_argparser("Add missing declension to Latin terms")
parser.add_argument(
    "--direcfile",
    help="File of output directives from make_latin_missing_decl.py",
    required=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)

lines = [x.rstrip('\n') for x in codecs.open(args.direcfile, "r", "utf-8")]
for i, line in blib.iter_items(lines, start, end):
    m = re.search("^Page [0-9]+ (.*?): For noun (.*?), declension (.*?)$",
                  line)
    if not m:
        msg("Unrecognized line, skipping: %s" % line)
    else:
        pagename, headword_template, decl_template = m.groups()

        def do_process_page(page, index, parsed):
            return process_page(page, index, headword_template, decl_template)

        blib.do_edit(pywikibot.Page(site, pagename),
                     i,
                     do_process_page,
                     save=args.save,
                     verbose=args.verbose,
                     diff=args.diff)
Esempio n. 53
0
def rewrite_idafa(save, verbose, startFrom, upTo):
  for template in arabic_decl_templates:
    for page, index in blib.references("Template:" + template, startFrom, upTo):
      blib.do_edit(page, index, rewrite_one_page_idafa, save=save,
          verbose=verbose)
def push_manual_changes(save, verbose, direcfile, annotation, startFrom, upTo):
  template_changes = []
  for line in codecs.open(direcfile, "r", encoding="utf-8"):
    line = line.strip()
    m = re.match(r"^Page [^ ]+ (.*?): .*?: (\{\{.*?\}\}) <- \{\{.*?\}\} \((\{\{.*?\}\})\)$",
        line)
    if not m:
      m = re.match(r"^\* (?:Page [^ ]+ )?\[\[(.*?)\]\]: .*?: <nowiki>(\{\{.*?\}\}) <- \{\{.*?\}\} \((\{\{.*?\}\})\)</nowiki>.*$",
          line)
      if not m:
        msg("WARNING: Unable to parse line: [%s]" % line)
        continue
    if m.group(2) != m.group(3):
      # If the current template is the same as the current template of the
      # previous entry, ignore the previous entry; otherwise we won't be
      # able to locate the current template the second time around. This
      # happens e.g. in the output of find_russian_need_vowels.py when
      # processing a template such as cardinalbox or compound that has
      # more than one foreign-language parameter in it.
      if len(template_changes) > 0 and template_changes[-1][2] == m.group(3):
        msg("Ignoring change for pagename %s, %s -> %s" % template_changes[-1])
        template_changes.pop()
      template_changes.append(m.groups())

  for current, index in blib.iter_pages(template_changes, startFrom, upTo,
      # key is the page name
      key = lambda x: x[0]):
    pagename, repl_template, curr_template = current

    def push_one_manual_change(page, index, text):
      def pagemsg(txt):
        msg("Page %s %s: %s" % (index, unicode(page.title()), txt))
      #template = blib.parse_text(template_text).filter_templates()[0]
      #orig_template = unicode(template)
      #if getparam(template, "sc") == "polytonic":
      #  template.remove("sc")
      #to_template = unicode(template)
      #param_value = getparam(template, removed_param)
      #template.remove(removed_param)
      #from_template = unicode(template)
      text = unicode(text)
      found_repl_template = repl_template in text
      newtext = text.replace(curr_template, repl_template)
      changelog = ""
      if newtext == text:
        if not found_repl_template:
          pagemsg("WARNING: Unable to locate current template: %s"
              % curr_template)
        else:
          pagemsg("Replacement template already found, taking no action")
      else:
        if found_repl_template:
          pagemsg("WARNING: Made change, but replacement template %s already present!" %
              repl_template)
        repl_curr_diff = len(repl_template) - len(curr_template)
        newtext_text_diff = len(newtext) - len(text)
        if newtext_text_diff == repl_curr_diff:
          pass
        else:
          ratio = float(newtext_text_diff) / repl_curr_diff
          if ratio == int(ratio):
            pagemsg("WARNING: Replaced %s occurrences of curr=%s with repl=%s"
                % (int(ratio), curr_template, repl_template))
          else:
            pagemsg("WARNING: Something wrong, length mismatch during replacement: Expected length change=%s, actual=%s, ratio=%.2f, curr=%s, repl=%s"
                % (repl_curr_diff, newtext_text_diff, ratio, curr_template,
                  repl_template))
        changelog = "Replaced %s with %s (%s)" % (curr_template, repl_template,
            annotation)
        pagemsg("Change log = %s" % changelog)
      return newtext, changelog

    page = pywikibot.Page(site, pagename)
    if not page.exists():
      msg("Page %s %s: WARNING, something wrong, does not exist" % (
        index, pagename))
    else:
      blib.do_edit(page, index, push_one_manual_change, save=save,
          verbose=verbose)
Esempio n. 55
0
def split_etymologies(save, verbose, startFrom, upTo):
  def split_page_etymologies(page, index, pagetext):
    return split_one_page_etymologies(page, index, pagetext, verbose)
  for index, page in blib.cat_articles("Arabic lemmas", startFrom, upTo):
    blib.do_edit(page, index, split_page_etymologies, save=save,
        verbose=verbose)