Example #1
0
 def fix_quote_usenet_params(t):
   origt = unicode(t)
   monthday = getparam(t, "monthday").strip()
   year = getparam(t, "year").strip()
   if monthday and year:
     if getparam(t, "date"):
       pagemsg("WARNING: Would set date= but is already present: %s"
           % unicode(t))
     else:
       rmparam(t, "date") # in case of blank param
       param = t.get("monthday")
       param.name = "date"
       if re.search("^[0-9]+/[0-9]+$", monthday):
         param.value = "%s/%s" % (monthday, year)
       else:
         param.value = "%s %s" % (monthday, year)
       rmparam(t, "year")
       pagemsg("monthday/year -> date")
   move_param(t, "group", "newsgroup")
   move_param(t, "text", "passage")
   move_param(t, "6", "passage")
   move_param(t, "5", "url")
   move_param(t, "4", "newsgroup")
   move_param(t, "3", "title")
   move_param(t, "2", "author")
   move_param(t, "1", "date")
   return origt != unicode(t)
Example #2
0
def process_page(page, index, parsed):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    if unicode(t.name) == "ru-phrase":
      if t.has("tr"):
        pagemsg("WARNING: Has tr=: %s" % unicode(t))
      if t.has("head"):
        if t.has("1"):
          pagemsg("WARNING: Has both head= and 1=: %s" % unicode(t))
        else:
          notes.append("ru-phrase: convert head= to 1=")
          origt = unicode(t)
          head = getparam(t, "head")
          rmparam(t, "head")
          tr = getparam(t, "tr")
          rmparam(t, "tr")
          t.add("1", head)
          if tr:
            t.add("tr", tr)
          pagemsg("Replacing %s with %s" % (origt, unicode(t)))

  return unicode(parsed), notes
Example #3
0
def process_text_on_page(index, pagetitle, text):
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    notes = []

    if "es-IPA" not in text and "fr-IPA" not in text and "it-IPA" not in text:
        return

    parsed = blib.parse_text(text)

    for t in parsed.filter_templates():
        tn = tname(t)
        origt = unicode(t)
        if tn in ["es-IPA", "fr-IPA", "it-IPA"]:
            must_continue = False
            for i in xrange(2, 11):
                if getparam(t, str(i)):
                    pagemsg("Template has %s=, not touching: %s" % (i, origt))
                    must_continue = True
                    break
            if must_continue:
                continue
            par1 = getparam(t, "1")
            if par1 == pagetitle:
                rmparam(t, "1")
                notes.append("remove redundant 1=%s from {{%s}}" % (par1, tn))
            if unicode(t) != origt:
                pagemsg("Replaced %s with %s" % (origt, unicode(t)))

    return unicode(parsed), notes
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("WARNING: Script no longer applies and would need fixing up")
    return

    pagemsg("Processing")

    text = unicode(page.text)
    parsed = blib.parse(page)
    notes = []
    for t in parsed.filter_templates():
        origt = unicode(t)
        param1 = getparam(t, "1")
        if unicode(t.name) in ["ru-conj"]:
            if re.search(r"^6[ac]", param1):
                if getparam(t, "no_iotation"):
                    rmparam(t, "no_iotation")
                    if param1.startswith("6a"):
                        notes.append(u"6a + no_iotation -> 6°a")
                    else:
                        notes.append(u"6c + no_iotation -> 6°c")
                    t.add("1", re.sub("^6", u"6°", param1))
            elif re.search(r"^6b", param1):
                notes.append(u"6b -> 6°b")
                t.add("1", re.sub("^6", u"6°", param1))
        newt = unicode(t)
        if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))

    return unicode(parsed), notes
Example #5
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  parsed = blib.parse(page)
  found_page_head = False
  for t in parsed.filter_templates():
    found_this_head = False
    tname = unicode(t.name)
    if tname in ru_head_templates:
      headname = tname
      found_this_head = True
    elif tname == "head" and getparam(t, "1") == "ru":
      headtype = getparam(t, "2")
      headname = "head|ru|%s" % headtype
      if headtype in ru_heads_to_warn_about:
        pagemsg("WARNING: Found %s" % headname)
      found_this_head = True
    if found_this_head:
      cat_head_count[headname] = cat_head_count.get(headname, 0) + 1
      overall_head_count[headname] = overall_head_count.get(headname, 0) + 1
      found_page_head = True
  if not found_page_head:
    pagemsg("WARNING: No head")
  if index % 100 == 0:
    output_heads_seen()
Example #6
0
def canon_param(pagetitle, index, template, param, paramtr, translit_module,
    include_tempname_in_changelog=False):
  if isinstance(param, list):
    fromparam, toparam = param
  else:
    fromparam, toparam = (param, param)
  foreign = (pagetitle if fromparam == "page title" else
    getparam(template, fromparam))
  latin = getparam(template, paramtr)
  if not foreign:
    return False
  canonforeign, canonlatin, actions = do_canon_param(pagetitle, index,
      template, fromparam, toparam, paramtr, foreign, latin, translit_module,
      include_tempname_in_changelog)
  oldtempl = "%s" % unicode(template)
  if canonforeign:
    addparam(template, toparam, canonforeign)
  if canonlatin == True:
    template.remove(paramtr)
  elif canonlatin:
    addparam(template, paramtr, canonlatin)
  if canonforeign or canonlatin:
    msg("Page %s %s: Replaced %s with %s" % (index, pagetitle,
      oldtempl, unicode(template)))
  return actions
Example #7
0
def process_text_on_page_for_full_conj(index, pagename, text, verbs):
  global args
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagename, txt))
  def errandpagemsg(txt):
    errandmsg("Page %s %s: %s" % (index, pagename, txt))

  pagemsg("Processing")

  notes = []

  if pagename not in verbs:
    pagemsg("WARNING: Can't find entry, skipping")
    return

  entry = verbs[pagename]
  origentry = entry
  first, rest = pagename.split(" ", 1)
  restwords = rest.split(" ")
  def_link = "%s<> %s" % (first, " ".join("[[%s]]" % word for word in restwords))
  if def_link == entry:
    pagemsg("Replacing entry '%s' with a blank entry because it's the default" % entry)
    entry = ""
  elif re.sub("<.*?>", "<>", entry) == def_link:
    newentry = blib.remove_links(entry)
    pagemsg("Replacing entry '%s' with entry without links '%s'" % (entry, newentry))
    entry = newentry

  parsed = blib.parse_text(text)
  for t in parsed.filter_templates():
    tn = tname(t)
    origt = unicode(t)
    if tn == "es-verb":
      if not getparam(t, "attn"):
        pagemsg("Didn't see attn=1: %s" % unicode(t))
        continue
      rmparam(t, "attn")
      if entry:
        t.add("1", entry)
        notes.append("add conjugation '%s' to Spanish verb" % entry)
      else:
        notes.append("add conjugation (default) to Spanish verb")
    if tn == "head" and getparam(t, "1") == "es" and getparam(t, "2") == "verb":
      head = getparam(t, "head")
      if head:
        pagemsg("WARNING: Removing head=%s compared with entry '%s', original entry '%s': %s" %
            (head, entry, origentry, unicode(t)))
        rmparam(t, "head")
      rmparam(t, "2")
      rmparam(t, "1")
      blib.set_template_name(t, "es-verb")
      if entry:
        t.add("1", entry)
        notes.append("convert {{head|es|verb}} to {{es-verb|%s}}" % entry)
      else:
        notes.append("convert {{head|es|verb}} to {{es-verb}}")
    if origt != unicode(t):
      pagemsg("Replaced %s with %s" % (origt, unicode(t)))

  return unicode(parsed), notes
  def canonicalize_one_page_verb_form(page, index, text):
    pagetitle = page.title()
    msg("Processing page %s" % pagetitle)
    actions_taken = []

    for template in text.filter_templates():
      if template.name == tempname:
        origtemp = unicode(template)
        form = getparam(template, formarg)
        if form:
          addparam(template, formarg, canonicalize_form(form))
        newtemp = unicode(template)
        if origtemp != newtemp:
          msg("Replacing %s with %s" % (origtemp, newtemp))
        if re.match("^[1I](-|$)", form):
          actions_taken.append("form=%s (%s/%s)" % (form,
            getparam(template, str(1+int(formarg))),
            getparam(template, str(2+int(formarg)))))
        else:
          actions_taken.append("form=%s" % form)
    changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % (
        tempname, formarg, '; '.join(actions_taken))
    if len(actions_taken) > 0:
      msg("Change log = %s" % changelog)
    return text, changelog
Example #9
0
 def do_comparative_superlative_of(pos, existing_t, should_end):
     if getparam(t, "1") != "de":
         pagemsg(
             "WARNING: Saw wrong language in {{%s of}}, skipping: %s" %
             (pos, origt))
         return False
     if existing_t:
         pagemsg(
             "WARNING: Saw two {{%s of}} templates, skipping: %s and %s"
             % (pos, unicode(existing_t), origt))
         return False
     if not headt:
         pagemsg(
             "WARNING: Saw {{%s of}} without head template, skipping: %s"
             % (pos, origt))
         return False
     if not pagetitle.endswith(should_end):
         pagemsg(
             "WARNING: Incorrect ending for %s, should be -%s, skipping"
             % (pos, should_end))
         return False
     param2 = getparam(headt, "2")
     if param2 != "%s adjective" % pos:
         headt.add("2", "%s adjective" % pos)
         notes.append(
             "convert {{head|de|%s}} to {{head|de|%s adjective}}" %
             (param2, pos))
     return t
Example #10
0
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def errpagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))
        errmsg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []
    for t in parsed.filter_templates():
        origt = unicode(t)
        if tname(t) in [
                "ru-conj", "ru-conj-old", "User:Benwing2/ru-conj",
                "User:Benwing2/ru-conj-old"
        ]:
            t.add("1", getparam(t, "1").replace("-refl", ""))
        elif tname(t) == "temp" and getparam(t, "1") == "ru-conj":
            t.add("2", getparam(t, "2").replace("-refl", ""))
        newt = unicode(t)
        if origt != newt:
            notes.append("remove -refl from verb type")
            pagemsg("Replaced %s with %s" % (origt, newt))

    return parsed, notes
def find_head_comp_sup(pagetitle, pagemsg):
    page = pywikibot.Page(site, pagetitle)
    text = unicode(page.text)
    parsed = blib.parse_text(text)
    for t in parsed.filter_templates():
        if tname(t) == "la-adv":
            head = getparam(t, "1")
            comp = getparam(t, "comp") or getparam(t, "2")
            sup = getparam(t, "sup") or getparam(t, "3")
            if not comp or not sup:
                for suff in [
                        "iter", "nter", "ter", "er", u"iē", u"ē", "im", u"ō"
                ]:
                    m = re.search("^(.*?)%s$" % suff, head)
                    if m:
                        stem = m.group(1)
                        if suff == "nter":
                            stem += "nt"
                        default_comp = stem + "ius"
                        default_sup = stem + u"issimē"
                        break
                else:
                    pagemsg(
                        "WARNING: Didn't recognize ending of adverb headword %s"
                        % head)
                    return head, comp, sup
                comp = comp or default_comp
                sup = sup or default_sup
            return head, comp, sup
    return None, None, None
Example #12
0
def process_page(page, index, parsed):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  notes = []

  text = unicode(page.text)
  parsed = blib.parse_text(text)
  for t in parsed.filter_templates():
    tn = tname(t)
    origt = unicode(t)
    param = None
    if tn in ["bg-noun", "bg-proper noun", "bg-verb", "bg-adj", "bg-adv",
        "bg-part", "bg-part form", "bg-verbal noun", "bg-verbal noun form",
        "bg-phrase"]:
      param = "1"
    elif tn == "head" and getparam(t, "1") == "bg":
      param = "head"
    if param:
      val = getparam(t, param)
      val = bglib.decompose(val)
      if GR in val:
        val = val.replace(GR, AC)
        t.add(param, val)
        notes.append("convert grave to acute in {{%s}}" % tn)
    if unicode(t) != origt:
      pagemsg("Replaced %s with %s" % (origt, unicode(t)))
  return unicode(parsed), notes
def process_page(page, index, parsed):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  notes = []

  for t in parsed.filter_templates():
    tn = tname(t)
    if tn == "la-decl-2nd":
      stem = getparam(t, "1")
      if stem.endswith("i"):
        blib.set_template_name(t, "la-decl-2nd-ius")
        t.add("1", stem[:-1])
        notes.append("Fix noun in -ius to use {{la-decl-2nd-ius}}")
      else:
        pagemsg("WARNING: Found la-decl-2nd without stem in -i: %s" % unicode(t))
    elif tn == "la-decl-2nd-N":
      stem = getparam(t, "1")
      if stem.endswith("i"):
        blib.set_template_name(t, "la-decl-2nd-N-ium")
        t.add("1", stem[:-1])
        notes.append("Fix noun in -ium to use {{la-decl-2nd-N-ium}}")
      else:
        pagemsg("WARNING: Found la-decl-2nd-N without stem in -i: %s" % unicode(t))

  return unicode(parsed), notes
Example #14
0
 def do_one_page_verb(page, index, text):
   pagename = page.title()
   verbcount = 0
   verbids = []
   for template in text.filter_templates():
     if template.name == "ar-conj":
       verbcount += 1
       vnvalue = getparam(template, "vn")
       uncertain = False
       if vnvalue.endswith("?"):
         vnvalue = vnvalue[:-1]
         msg("Page %s %s: Verbal noun(s) identified as uncertain" % (
           index, pagename))
         uncertain = True
       if not vnvalue:
         continue
       vns = re.split(u"[,،]", vnvalue)
       form = getparam(template, "1")
       verbid = "#%s form %s" % (verbcount, form)
       if re.match("^[1I](-|$)", form):
         verbid += " (%s,%s)" % (getparam(template, "2"), getparam(template, "3"))
       no_i3rab_vns = []
       for vn in vns:
         no_i3rab_vns.append(remove_i3rab(pagename, index, verbid, vn))
       newvn = ",".join(no_i3rab_vns)
       if uncertain:
         newvn += "?"
       if newvn != vnvalue:
         msg("Page %s %s: Verb %s, replacing %s with %s" % (
           index, pagename, verbid, vnvalue, newvn))
         addparam(template, "vn", newvn)
         verbids.append(verbid)
   return text, "Remove i3rab from verbal nouns for verb(s) %s" % (
         ', '.join(verbids))
 def undo_one_page_greek_removal(page, index, text):
   def pagemsg(txt):
     msg("Page %s %s: %s" % (index, unicode(page.title()), txt))
   template = blib.parse_text(template_text).filter_templates()[0]
   orig_template = unicode(template)
   if getparam(template, "sc") == "polytonic":
     template.remove("sc")
   to_template = unicode(template)
   param_value = getparam(template, removed_param)
   template.remove(removed_param)
   from_template = unicode(template)
   text = unicode(text)
   found_orig_template = orig_template in text
   newtext = text.replace(from_template, to_template)
   changelog = ""
   if newtext == text:
     if not found_orig_template:
       pagemsg("WARNING: Unable to locate 'from' template when undoing Greek param removal: %s"
           % from_template)
     else:
       pagemsg("Original template found, taking no action")
   else:
     if found_orig_template:
       pagemsg("WARNING: Undid removal, but original template %s already present!" %
           orig_template)
     if len(newtext) - len(text) != len(to_template) - len(from_template):
       pagemsg("WARNING: Length mismatch when undoing Greek param removal, may have matched multiple templates: from=%s, to=%s" % (
         from_template, to_template))
     changelog = "Undid removal of %s=%s in %s" % (removed_param,
         param_value, to_template)
     pagemsg("Change log = %s" % changelog)
   return newtext, changelog
def rewrite_one_page_verb_headword(page, index, text):
  pagetitle = page.title()
  msg("Processing page %s" % pagetitle)
  actions_taken = []

  for template in text.filter_templates():
    if template.name in ["ar-verb"]:
      origtemp = unicode(template)
      form = getparam(template, "form")
      if form:
        # In order to keep in the same order, just forcibly change the
        # param "names" (numbers)
        for pno in xrange(10, 0, -1):
          if template.has(str(pno)):
            template.get(str(pno)).name = str(pno + 1)
        # Make sure form= param is first ...
        template.remove("form")
        addparam(template, "form", canonicalize_form(form), before=template.params[0].name if len(template.params) > 0 else None)
        # ... then forcibly change its name to 1=
        template.get("form").name = "1"
        template.get("1").showkey = False
      newtemp = unicode(template)
      if origtemp != newtemp:
        msg("Replacing %s with %s" % (origtemp, newtemp))
      if re.match("^[1I](-|$)", form):
        actions_taken.append("form=%s (%s/%s)" % (form,
          getparam(template, "2"), getparam(template, "3")))
      else:
        actions_taken.append("form=%s" % form)
  changelog = "ar-verb: form= -> 1= and canonicalize to Roman numerals, move other params up: %s" % '; '.join(actions_taken)
  if len(actions_taken) > 0:
    msg("Change log = %s" % changelog)
  return text, changelog
Example #17
0
 def fix_cite_book_params(t):
   origt = unicode(t)
   if getparam(t, "origyear").strip() and getparam(t, "year").strip():
     if getparam(t, "year_published"):
       pagemsg("WARNING: Would set year_published= but is already present: %s"
           % unicode(t))
     else:
       rmparam(t, "year_published") # in case of blank param
       t.get("year").name = "year_published"
       t.get("origyear").name = "year"
       pagemsg("year -> year_published, origyear -> year")
   move_param(t, "origdate", "date")
   move_param(t, "origmonth", "month")
   def frob_isbn(idval):
     isbn_re = r"^(\s*)(10-ISBN +|ISBN-13 +|ISBN:? +|ISBN[-=] *)"
     if re.search(isbn_re, idval, re.I):
       return re.sub(isbn_re, r"\1", idval, 0, re.I)
     elif re.search(r"^[0-9]", idval.strip()):
       return idval
     else:
       pagemsg("WARNING: Would replace id= -> isbn= but id=%s doesn't begin with 'ISBN '" %
           idval.replace("\n", r"\n"))
       return None
   move_param(t, "id", "isbn", frob_isbn)
   fix_page_params(t)
   return origt != unicode(t)
Example #18
0
 def sub_template(val):
     val = re.sub(r"\{\{\{1\|?\}\}\}", getparam(template, "1"), val)
     val = re.sub(r"\{\{\{2\|?\}\}\}", getparam(template, "2"), val)
     val = re.sub(r"\{\{\{pp\|(.*?)\}\}\}",
                  lambda m: getparam(template, "pp") or m.group(1),
                  val)
     return val
 def replace_spenser_fq(m):
     template, text = m.groups()
     parsed = blib.parse_text(template)
     t = list(parsed.filter_templates())[0]
     par2 = getparam(t, "2")
     if par2:
         canto = arabic_to_roman(par2)
         if not canto:
             return m.group(0)
         t.add("canto", canto, before="2")
         rmparam(t, "2")
     par1 = getparam(t, "1")
     if par1:
         book = arabic_to_roman(par1)
         if not book:
             return m.group(0)
         t.add("book", book, before="1")
         rmparam(t, "1")
     text = re.sub(r"\s*<br */?>\s*", " / ", text)
     text = re.sub(r"^\{\{quote\|en\|(.*)\}\}$", r"\1", text)
     t.add("passage", text)
     blib.set_template_name(t, "RQ:Spenser Faerie Queene")
     notes.append(
         "reformat {{RQ:Spenser FQ}} into {{RQ:Spenser Faerie Queene}}")
     return unicode(t) + "\n"
Example #20
0
def canon_param(pagetitle,
                index,
                template,
                param,
                paramtr,
                include_tempname_in_changelog=False):
    if isinstance(param, list):
        fromparam, toparam = param
    else:
        fromparam, toparam = (param, param)
    arabic = (pagetitle if fromparam == "page title" else getparam(
        template, fromparam))
    latin = getparam(template, paramtr)
    if not arabic:
        return False
    canonarabic, canonlatin, actions = do_canon_param(
        pagetitle, index, template, fromparam, toparam, paramtr, arabic, latin,
        include_tempname_in_changelog)
    oldtempl = "%s" % unicode(template)
    if canonarabic:
        addparam(template, toparam, canonarabic)
    if canonlatin == True:
        template.remove(paramtr)
    elif canonlatin:
        addparam(template, paramtr, canonlatin)
    if canonarabic or canonlatin:
        msg("Page %s %s: Replaced %s with %s" %
            (index, pagetitle, oldtempl, unicode(template)))
    return actions
Example #21
0
def canon_param(pagetitle, index, template, lang, param, paramtr,
                translit_module):
    if isinstance(param, list):
        fromparam, toparam = param
    else:
        fromparam, toparam = (param, param)
    foreign = (pagetitle if fromparam == "page title" else getparam(
        template, fromparam))
    latin = getparam(template, paramtr)
    if not foreign:
        return False
    canonforeign, canonlatin, actions = do_canon_param(pagetitle, index,
                                                       template, lang,
                                                       fromparam, toparam,
                                                       paramtr, foreign, latin,
                                                       translit_module)
    oldtempl = "%s" % unicode(template)
    if canonforeign:
        add_param_handling_head(template, toparam, canonforeign)
    if canonlatin == True:
        template.remove(paramtr)
    elif canonlatin:
        addparam(template, paramtr, canonlatin)
    if canonforeign or canonlatin:
        msg("Page %s %s: Replaced %s with %s" %
            (index, pagetitle, oldtempl, unicode(template)))
    return actions
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []

    text = unicode(page.text)
    parsed = blib.parse_text(text)
    for t in parsed.filter_templates():
        tn = tname(t)
        origt = unicode(t)
        if tn == "head" and getparam(t, "1") == "ang" and getparam(
                t, "2") in ["adjective", "adjectives"]:
            pagemsg("WARNING: {{head}} for adjectives, should not occur: %s" %
                    unicode(t))
        elif tn == "ang-adj":
            if getparam(t, "1"):
                pagemsg("WARNING: 1= in ang-adj, should not occur: %s" %
                        unicode(t))
            else:
                head = getparam(t, "head")
                rmparam(t, "head")
                if head:
                    t.add("1", head)
                notes.append("move head= to 1= in {{ang-adj}}")
        if unicode(t) != origt:
            pagemsg("Replaced %s with %s" % (origt, unicode(t)))
    return parsed, notes
 def replace_trans(m, newlangcode, newlangname):
     prefix, transtext = m.groups()
     parsed = blib.parse_text(transtext)
     for t in parsed.filter_templates():
         origt = unicode(t)
         tn = tname(t)
         if tn in trans_templates:
             if getparam(t, "1") == "ku":
                 t.add("1", newlangcode)
                 rmparam(t, "sc")
                 pagemsg(
                     "Replaced %s with %s based on language prefix of translation entry"
                     % (origt, unicode(t)))
                 notes.append(
                     "{{%s|ku}} -> {{%s|%s}} based on language prefix of translation entry"
                     % (tn, tn, newlangcode))
         elif tn == "t-simple":
             if getparam(t, "1") == "ku":
                 if getparam(t, "langname" != "Kurdish"):
                     pagemsg(
                         "WARNING: Something wrong, t-simple|ku without langname=Kurdish: %s"
                         % unicode(t))
                 else:
                     t.add("1", newlangcode)
                     t.add("langname", newlangname)
                     pagemsg("Replaced %s with %s based on prefix" %
                             (origt, unicode(t)))
                     notes.append(
                         "{{t-simple|ku|langname=Kurdish}} -> {{t-simple|%s|langname=%s}} based on language prefix"
                         % (newlangcode, newlangname))
     transtext = unicode(parsed)
     return prefix + transtext
Example #24
0
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []
    for t in parsed.filter_templates():
        if tname(t) == "R:Lexico":
            origt = unicode(t)
            rmparam(t, "lang")
            entry_uk = getparam(t, "entry_uk")
            if entry_uk:
                t.add("entry", entry_uk, before="entry_uk")
            rmparam(t, "entry_uk")
            url_uk = getparam(t, "url_uk")
            if url_uk:
                t.add("url", url_uk, before="url_uk")
            rmparam(t, "url_uk")
            p4 = getparam(t, "4")
            if p4:
                t.add("text", p4, before="4")
            rmparam(t, "4")
            newt = unicode(t)
            if origt != newt:
                notes.append("Remove/rearrange params in {{R:Lexico}}")
                pagemsg("Replaced %s with %s" % (origt, newt))

    return parsed, notes
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  found_inflection_of = False
  found_head_verb_form = False
  for t in parsed.filter_templates():
    if unicode(t.name) in ["inflection of"]:
      found_inflection_of = True
    if unicode(t.name) == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "verb form":
      found_head_verb_form = True

  if not found_head_verb_form or not found_inflection_of:
    # Find definition line
    foundrussian = False
    sections = re.split("(^==[^=]*==\n)", unicode(page.text), 0, re.M)

    for j in xrange(2, len(sections), 2):
      if sections[j-1] == "==Russian==\n":
        if foundrussian:
          pagemsg("WARNING: Found multiple Russian sections, skipping page")
          return
        foundrussian = True

        deflines = r"\n".join(re.findall(r"^(# .*)$", sections[j], re.M))

  if not found_head_verb_form:
    pagemsg("WARNING: No {{head|ru|verb form}}: %s" % deflines)
  if not found_inflection_of:
    pagemsg("WARNING: No 'inflection of': %s" % deflines)
Example #26
0
 def fix_quote_usenet_params(t):
   origt = unicode(t)
   monthday = getparam(t, "monthday").strip()
   year = getparam(t, "year").strip()
   if monthday and year:
     if getparam(t, "date"):
       pagemsg("WARNING: Would set date= but is already present: %s"
           % unicode(t))
     else:
       rmparam(t, "date") # in case of blank param
       param = t.get("monthday")
       param.name = "date"
       if re.search("^[0-9]+/[0-9]+$", monthday):
         param.value = "%s/%s" % (monthday, year)
       else:
         param.value = "%s %s" % (monthday, year)
       rmparam(t, "year")
       pagemsg("monthday/year -> date")
   move_param(t, "group", "newsgroup")
   move_param(t, "text", "passage")
   move_param(t, "6", "passage")
   move_param(t, "5", "url")
   move_param(t, "4", "newsgroup")
   move_param(t, "3", "title")
   move_param(t, "2", "author")
   move_param(t, "1", "date")
   return origt != unicode(t)
Example #27
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  text = unicode(page.text)
  parsed = blib.parse(page)

  for t in parsed.filter_templates():
    if unicode(t.name) == "ru-IPA":
      origt = unicode(t)
      if getparam(t, "phon"):
        pagemsg("phon= already present: %s" % unicode(t))
      else:
        phon = getparam(t, "1")
        pagemsg("Adding phon=: %s" % unicode(t))
        rmparam(t, "1")
        t.add("phon", phon)
        pagemsg("Replaced %s with %s" % (origt, unicode(t)))

  newtext = unicode(parsed)

  if newtext != text:
    if verbose:
      pagemsg("Replacing <<%s>> with <<%s>>" % (text, newtext))
    comment = "Add phon= to ru-IPA templates"
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = newtext
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
  else:
    pagemsg("Skipping")
def process_page(page, index):
    global args
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def expand_text(tempcall):
        return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose)

    parsed = blib.parse(page)

    for t in parsed.filter_templates():
        tn = tname(t)
        if tn == "fr-IPA":
            posval = getparam(t, "pos")
            pos_arg = "|pos=%s" % posval if posval else ""
            max_arg = 1
            for pronarg in xrange(2, 30):
                if getparam(t, str(pronarg)):
                    max_arg = pronarg
            for pronarg in xrange(1, max_arg + 1):
                pronval = getparam(t, str(pronarg)) or pagetitle
                pron = expand_text(
                    "{{#invoke:fr-pron|show|%s%s|check_new_module=1}}" %
                    (pronval, pos_arg))
                if " || " in pron:
                    pronold, pronnew = pron.split(" || ")
                    pagemsg(
                        "WARNING: {{fr-IPA|%s%s}} == %s in old but %s in new" %
                        (pronval, pos_arg, pronold, pronnew))
                else:
                    pagemsg("{{fr-IPA|%s%s}} == %s in both old and new" %
                            (pronval, pos_arg, pron))
def process_text_on_page(index, pagetitle, text):
    global args

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def expand_text(tempcall):
        return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose)

    notes = []

    parsed = blib.parse_text(text)
    rhymes_templates = args.rhymes_templates.decode("utf-8").split(",")
    if args.skip_langs:
        skip_lang_codes = args.skip_langs.decode("utf-8").split(",")
    else:
        skip_lang_codes = []
    if args.include_langs:
        include_lang_codes = args.include_langs.decode("utf-8").split(",")
    else:
        include_lang_codes = []
    for t in parsed.filter_templates():
        if tname(t) in rhymes_templates:
            langcode = getparam(t, "1")
            if include_lang_codes and getparam(t,
                                               "1") not in include_lang_codes:
                continue
            if skip_lang_codes and langcode in skip_lang_codes:
                continue
            expanded = expand_text(unicode(t))
            if not expanded:
                continue
            for cattext in re.findall(r"\[\[Category:Rhymes:.*?\]\]",
                                      expanded):
                pagemsg("Found rhymes category: %s" % cattext[2:-2])
Example #30
0
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    notes = []

    for t in parsed.filter_templates():
        tn = tname(t)
        if tn.startswith("ang-decl-"):
            origt = unicode(t)
            alt1 = getparam(t, "alt1")
            if alt1:
                t.add("1", alt1, before="alt1")
                rmparam(t, "alt1")
            alt2 = getparam(t, "alt2")
            if alt2:
                t.add("2", alt2, before="alt2")
                rmparam(t, "alt2")
            altnomsg = getparam(t, "altnomsg")
            if altnomsg:
                t.add("nomsg", altnomsg, before="altnomsg")
                rmparam(t, "altnomsg")
            if unicode(t) != origt:
                pagemsg("Replaced %s with %s" % (origt, unicode(t)))
                notes.append("move alt param to main param in {{ang-decl-*}}")

    return unicode(parsed), notes
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    text = unicode(page.text)
    notes = []

    parsed = blib.parse(page)
    for t in parsed.filter_templates():
        origt = unicode(t)
        if unicode(t.name) in ["ru-conj", "ru-conj-old"]:
            verbtype = getparam(t, "2")
            if verbtype in [
                    "pf", "pf-intr", "pf-refl", "pf-impers", "pf-intr-impers",
                    "pf-refl-impers", "impf", "impf-intr", "impf-refl",
                    "impf-impers", "impf-intr-impers", "impf-refl-impers"
            ]:
                conjtype = getparam(t, "1")
                t.add("2", conjtype)
                t.add("1", verbtype)
                notes.append("move verb type from arg 2 to arg 1")
        newt = unicode(t)
        if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))

    return unicode(parsed), notes
Example #32
0
def process_page_for_fix(page, index, parsed):
  pagename = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagename, txt))
  def errandpagemsg(txt):
    errandmsg("Page %s %s: %s" % (index, pagename, txt))

  pagemsg("Processing")

  notes = []

  text = unicode(page.text)

  newtext = re.sub(r"\[\[(.*?)\]\]", r"{{l|kmr|\1}}", text)
  if newtext != text:
    notes.append("convert raw links to {{l|kmr|...}}")
    text = newtext

  parsed = blib.parse_text(text)
  for t in parsed.filter_templates():
    origt = unicode(t)
    tn = tname(t)
    if tn in ["l", "rhymes nav"] and getparam(t, "1") == "ku":
      t.add("1", "kmr")
      notes.append("convert {{%s|ku}} to {{%s|kmr}}" % (tn, tn))
    elif getparam(t, "1") == "ku":
      pagemsg("WARNING: Kurdish-language template of unrecognized name: %s" % unicode(t))
    if origt != unicode(t):
      pagemsg("Replaced %s with %s" % (origt, unicode(t)))
  text = unicode(parsed)

  return text, notes
 def hack_templates(parsed, subsectitle):
     for t in parsed.filter_templates():
         origt = unicode(t)
         tn = tname(t)
         if tn in quote_templates:
             if not getparam(t, "nocat"):
                 continue
             if getparam(t, "lang").strip() != "en":
                 continue
             notes.append(
                 "convert nocat=1 in lang=en Translingual section to termlang=mul"
             )
             # Fetch all params.
             params = []
             for param in t.params:
                 pname = unicode(param.name)
                 if pname.strip() != "nocat":
                     params.append((pname, param.value, param.showkey))
             # Erase all params.
             del t.params[:]
             # Put lang and termlang parameters.
             newline = "\n" if "\n" in unicode(t.name) else ""
             t.add("lang", "en" + newline, preserve_spacing=False)
             t.add("termlang", "mul" + newline, preserve_spacing=False)
             # Put remaining parameters in order.
             for name, value, showkey in params:
                 t.add(name, value, showkey=showkey, preserve_spacing=False)
             pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t)))
def process_page(index, page):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)

  foundrussian = False
  sections = re.split("(^==[^=]*==\n)", text, 0, re.M)

  for j in xrange(2, len(sections), 2):
    if sections[j-1] == "==Russian==\n":
      if foundrussian:
        pagemsg("WARNING: Found multiple Russian sections, skipping page")
        return
      foundrussian = True

      found_headword_template = False
      parsed = blib.parse_text(sections[j])
      for t in parsed.filter_templates():
        tname = unicode(t.name)
        if tname == "ru-adj" or (tname == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "adjective form"):
          found_headword_template = True
      if not found_headword_template and "===Adjective===" in sections[j]:
        pagemsg("WARNING: Missing adj headword template")
def process_page(page, index, parsed):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  if not pagetitle.endswith(u"ся"):
    return

  text = unicode(page.text)
  notes = []

  parsed = blib.parse(page)
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-decl-adj", "ru-adj-old"] and getparam(t, "suffix") == u"ся":
      lemma = getparam(t, "1")
      lemma = re.sub(",", u"ся,", lemma)
      lemma = re.sub("$", u"ся", lemma)
      t.add("1", lemma)
      rmparam(t, "suffix")
      notes.append(u"move suffix=ся to lemma")
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  return unicode(parsed), notes
Example #36
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  def expand_text(tempcall):
    return blib.expand_text(tempcall, pagetitle, pagemsg, verbose)

  pagemsg("Processing")

  parsed = blib.parse(page)
  for t in parsed.filter_templates():
    if unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam(t, "1").startswith("pf"):
      if tname == "ru-conj":
        tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t))
      else:
        tempcall = re.sub(r"\{\{ru-conj-old", "{{ru-generate-verb-forms|old=y", unicode(t))
      result = expand_text(tempcall)
      if not result:
        pagemsg("WARNING: Error generating forms, skipping")
        continue
      args = rulib.split_generate_args(result)
      for base in ["past_pasv_part", "ppp"]:
        for i in ["", "2", "3", "4", "5", "6", "7", "8", "9"]:
          val = getparam(t, base + i)
          if val and val != "-":
            val = re.sub("//.*", "", val)
            pagemsg("Found perfective past passive participle: %s" % val)
def process_text_on_page(index, pagename, text):
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagename, txt))

    def errandpagemsg(txt):
        errandmsg("Page %s %s: %s" % (index, pagename, txt))

    pagemsg("Processing")

    notes = []

    parsed = blib.parse_text(text)
    for t in parsed.filter_templates():
        tn = tname(t)
        origt = unicode(t)
        if tn == "RQ:Buk Baibel":
            param1 = getparam(t, "1")
            if param1 in book_map:
                t.add("1", book_map[param1])
                notes.append("convert '%s' to '%s' in 1= in {{%s}}" %
                             (param1, book_map[param1], tn))
            param4 = getparam(t, "4")
            if param4:
                t.add("passage", param4, before="4")
                rmparam(t, "4")
                notes.append("4= -> passage= in {{%s}}" % tn)

        if unicode(t) != origt:
            pagemsg("Replaced %s with %s" % (origt, unicode(t)))

    return unicode(parsed), notes
def process_text_on_page(index, pagename, text):
    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagename, txt))

    def errandpagemsg(txt):
        errandmsg("Page %s %s: %s" % (index, pagename, txt))

    pagemsg("Processing")

    notes = []

    parsed = blib.parse_text(text)
    for t in parsed.filter_templates():
        tn = tname(t)
        if tn == "head" and getparam(t, "1") == "la":
            pos = getparam(t, "2")
            if pos not in pos_to_template:
                pagemsg("WARNING: Saw unrecognized part of speech %s: %s" %
                        (pos, unicode(t)))
                continue
            if getparam(t, "3") or getparam(t, "head"):
                pagemsg("WARNING: Saw 3= or head=: %s" % unicode(t))
                continue
            origt = unicode(t)
            t.add("1", pagename)
            blib.set_template_name(t, pos_to_template[pos])
            rmparam(t, "2")
            t.add("FIXME", "1")
            pagemsg("Replaced %s with %s" % (origt, unicode(t)))
            notes.append("replace {{head|la|%s}} with {{%s}}" %
                         (pos, tname(t)))

    return unicode(parsed), notes
def process_page(page, index, parsed):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")
    notes = []

    for t in parsed.filter_templates():
        origt = unicode(t)
        tn = tname(t)
        if tn == "rfdef":
            if getparam(t, "lang"):
                pagemsg("WARNING: has lang=, skipping: %s" % unicode(t))
                continue
            lang = getparam(t, "1")
            if lang in langs_to_convert:
                newlang = langs_to_convert[lang]
                t.add("1", newlang)
                notes.append("convert {{rfdef|%s}} to {{rfdef|%s}}" %
                             (lang, newlang))
                lang = newlang
            if lang in langs_to_remove_sort:
                if t.has("sort"):
                    rmparam(t, "sort")
                    notes.append(
                        "remove sort= from {{rfdef|%s}}, now auto-computed" %
                        lang)
        if unicode(t) != origt:
            pagemsg("Replaced <%s> with <%s>" % (origt, unicode(t)))

    return unicode(parsed), notes
Example #40
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  parsed = blib.parse(page)
  found_page_head = False
  for t in parsed.filter_templates():
    found_this_head = False
    tname = unicode(t.name)
    if tname in ru_head_templates:
      headname = tname
      found_this_head = True
    elif tname == "head" and getparam(t, "1") == "ru":
      headtype = getparam(t, "2")
      headname = "head|ru|%s" % headtype
      if headtype in ru_heads_to_warn_about:
        pagemsg("WARNING: Found %s" % headname)
      found_this_head = True
    if found_this_head:
      cat_head_count[headname] = cat_head_count.get(headname, 0) + 1
      overall_head_count[headname] = overall_head_count.get(headname, 0) + 1
      found_page_head = True
  if not found_page_head:
    pagemsg("WARNING: No head")
  if index % 100 == 0:
    output_heads_seen()
Example #41
0
 def fix_cite_book_params(t):
   origt = unicode(t)
   if getparam(t, "origyear").strip() and getparam(t, "year").strip():
     if getparam(t, "year_published"):
       pagemsg("WARNING: Would set year_published= but is already present: %s"
           % unicode(t))
     else:
       rmparam(t, "year_published") # in case of blank param
       t.get("year").name = "year_published"
       t.get("origyear").name = "year"
       pagemsg("year -> year_published, origyear -> year")
   move_param(t, "origdate", "date")
   move_param(t, "origmonth", "month")
   def frob_isbn(idval):
     isbn_re = r"^(\s*)(10-ISBN +|ISBN-13 +|ISBN:? +|ISBN[-=] *)"
     if re.search(isbn_re, idval, re.I):
       return re.sub(isbn_re, r"\1", idval, 0, re.I)
     elif re.search(r"^[0-9]", idval.strip()):
       return idval
     else:
       pagemsg("WARNING: Would replace id= -> isbn= but id=%s doesn't begin with 'ISBN '" %
           idval.replace("\n", r"\n"))
       return None
   move_param(t, "id", "isbn", frob_isbn)
   fix_page_params(t)
   return origt != unicode(t)
Example #42
0
def process_page(index, page, save, verbose):
    pagetitle = unicode(page.title())

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    def expand_text(tempcall):
        return blib.expand_text(tempcall, pagetitle, pagemsg, verbose)

    pagemsg("Processing")

    parsed = blib.parse(page)
    for t in parsed.filter_templates():
        if unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam(
                t, "1").startswith("pf"):
            if tname == "ru-conj":
                tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms",
                                  unicode(t))
            else:
                tempcall = re.sub(r"\{\{ru-conj-old",
                                  "{{ru-generate-verb-forms|old=y", unicode(t))
            result = expand_text(tempcall)
            if not result:
                pagemsg("WARNING: Error generating forms, skipping")
                continue
            args = blib.split_generate_args(result)
            for base in ["past_pasv_part", "ppp"]:
                for i in ["", "2", "3", "4", "5", "6", "7", "8", "9"]:
                    val = getparam(t, base + i)
                    if val and val != "-":
                        val = re.sub("//.*", "", val)
                        pagemsg(
                            "Found perfective past passive participle: %s" %
                            val)
Example #43
0
def process_page(templates, index, page, save=False, verbose=False):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  if not page.exists():
    pagemsg("WARNING: Page doesn't exist")
    return

  parsed = blib.parse(page)

  should_save = False

  for t in parsed.filter_templates():

    if unicode(t.name) in templates:
      origt = unicode(t)
      # Punt if multi-arg-set, can't handle yet
      should_continue = False
      for param in t.params:
        if not param.showkey:
          val = unicode(param.value)
          if val == "or":
            pagemsg("WARNING: Can't handle multi-decl templates: %s" % unicode(t))
            should_continue = True
            break
          if val == "-" or val == "_" or val.startswith("join:"):
            pagemsg("WARNING: Can't handle multi-word templates: %s" % unicode(t))
            should_continue = True
            break
      if should_continue:
        continue

      if arg1_is_stress(getparam(t, "1")):
        oldplarg = "5"
        newplarg = "4"
      else:
        oldplarg = "4"
        newplarg = "3"
      plstem = getparam(t, oldplarg)
      if plstem:
        if getparam(t, newplarg):
          pagemsg("WARNING: Something wrong, found args in both positions %s and %s: %s" %
              (newplarg, oldplarg, unicode(t)))
          continue
        rmparam(t, oldplarg)
        t.add(newplarg, plstem)
        should_save = True
        pagemsg("Replacing %s with %s" % (origt, unicode(t)))

  if should_save:
    comment = "Move plstem from 5th/4th argument to 4th/3rd"
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = unicode(parsed)
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
def process_template(pagetitle, index, template, ruparam, trparam, output_line,
    find_accents, verbose):
  origt = unicode(template)
  saveparam = ruparam
  def pagemsg(text):
    msg("Page %s %s: %s" % (index, pagetitle, text))
  def expand_text(tempcall):
    return blib.expand_text(tempcall, pagetitle, pagemsg, semi_verbose)
  if semi_verbose:
    pagemsg("Processing template: %s" % unicode(template))
  if unicode(template.name) == "head":
    # Skip {{head}}. We don't want to mess with headwords.
    return False
  if isinstance(ruparam, list):
    ruparam, saveparam = ruparam
  if ruparam == "page title":
    val = pagetitle
  else:
    val = getparam(template, ruparam)
  valtr = getparam(template, trparam) if trparam else ""
  changed = False
  if find_accents:
    newval, newtr = find_accented(val, valtr, verbose, pagemsg, expand_text,
        origt)
    if newval != val or newtr != valtr:
      if ru.remove_accents(newval) != ru.remove_accents(val):
        pagemsg("WARNING: Accented page %s changed from %s in more than just accents, not changing" % (newval, val))
      else:
        changed = True
        addparam(template, saveparam, newval)
        if newtr:
          if not trparam:
            pagemsg("WARNING: Unable to change translit to %s because no translit param available (Cyrillic param %s): %s" %
                (newtr, saveparam, origt))
          elif unicode(template.name) in ["ru-ux"]:
            pagemsg("WARNING: Not changing or adding translit param %s=%s to ru-ux: origt=%s" % (
              trparam, newtr, origt))
          else:
            if valtr and valtr != newtr:
              pagemsg("WARNING: Changed translit param %s from %s to %s: origt=%s" %
                  (trparam, valtr, newtr, origt))
            if not valtr:
              pagemsg("NOTE: Added translit param %s=%s to template: origt=%s" %
                  (trparam, newtr, origt))
            addparam(template, trparam, newtr)
        elif valtr:
          pagemsg("WARNING: Template has translit %s but lookup result has none, leaving translit alone: origt=%s" %
              (valtr, origt))
        if check_need_accent(newval):
          output_line("Need accents (changed)")
        else:
          output_line("Found accents")
  if not changed and check_need_accent(val):
    output_line("Need accents")
  if changed:
    pagemsg("Replaced %s with %s" % (origt, unicode(template)))
  return ["auto-accent %s%s" % (newval, "//%s" % newtr if newtr else "")] if changed else False
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-conj"]:
      conjtype = getparam(t, "1")
      if conjtype.startswith("6a"):
        param6 = getparam(t, "6")
        if param6:
          rmparam(t, "6")
          if not getparam(t, "5"):
            rmparam(t, "5")
          for i in xrange(1, 4):
            if not t.has(str(i)):
              t.add(str(i), "")
          t.add("4", param6)
          notes.append("move type 6a arg6 -> arg4")
      if conjtype.startswith("7b"):
        param7 = getparam(t, "7")
        if param7:
          rmparam(t, "7")
          for i in xrange(1, 6):
            if not t.has(str(i)):
              t.add(str(i), "")
          t.add("6", param7)
          notes.append("move type 7b arg7 -> arg6")
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Example #46
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-conj-7a", "ru-conj-7b"]:
      past_stem = getparam(t, "4")
      vowel_end = re.search(u"[аэыоуяеиёю́]$", past_stem)
      past_m = getparam(t, "past_m")
      past_f = getparam(t, "past_f")
      past_n = getparam(t, "past_n")
      past_pl = getparam(t, "past_pl")
      if past_m or past_f or past_n or past_pl:
        upast_stem = ru.make_unstressed(past_stem)
        expected_past_m = past_stem + (u"л" if vowel_end else "")
        expected_past_f = upast_stem + u"ла́"
        expected_past_n = upast_stem + u"ло́"
        expected_past_pl = upast_stem + u"ли́"
        if ((not past_m or expected_past_m == past_m) and
            expected_past_f == past_f and
            expected_past_n == past_n and
            expected_past_pl == past_pl):
          msg("Would remove past overrides and add arg5=b")
        else:
          msg("WARNING: Remaining past overrides: past_m=%s, past_f=%s, past_n=%s, past_pl=%s, expected_past_m=%s, expected_past_f=%s, expected_past_n=%s, expected_past_pl=%s" %
              (past_m, past_f, past_n, past_pl, expected_past_m, expected_past_f, expected_past_n, expected_past_pl))
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  subpagetitle = re.sub("^.*:", "", pagetitle)
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  if ":" in pagetitle:
    pagemsg("WARNING: Colon in page title, skipping page")
    return

  text = unicode(page.text)
  notes = []

  foundrussian = False
  sections = re.split("(^==[^=]*==\n)", text, 0, re.M)

  for j in xrange(2, len(sections), 2):
    if sections[j-1] == "==Russian==\n":
      if foundrussian:
        pagemsg("WARNING: Found multiple Russian sections, skipping page")
        return
      foundrussian = True

      # Remove gender from adjective forms
      parsed = blib.parse_text(sections[j])
      for t in parsed.filter_templates():
        if unicode(t.name) == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "adjective form":
          origt = unicode(t)
          rmparam(t, "g")
          rmparam(t, "g2")
          rmparam(t, "g3")
          rmparam(t, "g4")
          newt = unicode(t)
          if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))
            notes.append("remove gender from adjective forms")
      sections[j] = unicode(parsed)
  new_text = "".join(sections)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(blib.group_notes(notes))
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Example #48
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    param2 = getparam(t, "2")
    if unicode(t.name) in ["ru-conj"] and re.search(r"^8[ab]", param2):
      if [x for x in t.params if unicode(x.value) == "or"]:
        pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t))
        continue
      past_m = getparam(t, "past_m")
      if past_m:
        rmparam(t, "past_m")
        stem = getparam(t, "3")
        if stem == past_m:
          pagemsg("Stem %s and past_m same" % stem)
          notes.append("remove redundant past_m %s" % past_m)
        elif (param2.startswith("8b") and not param2.startswith("8b/") and
            ru.make_unstressed(past_m) == stem):
          pagemsg("Class 8b/b and stem %s is unstressed version of past_m %s, replacing stem with past_m" % (
            stem, past_m))
          t.add("3", past_m)
          notes.append("moving past_m %s to arg 3" % past_m)
        else:
          pagemsg("Stem %s and past_m %s are different, putting past_m in param 5" % (
            stem, past_m))
          t.add("5", past_m)
          notes.append("moving past_m %s to arg 5" % past_m)
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Example #49
0
def vocalize_param(pagetitle, index, template, param, paramtr):
  arabic = getparam(template, param)
  latin = getparam(template, paramtr)
  if not arabic:
    return False
  if latin:
    vocalized = do_vocalize_param(pagetitle, index, template, param, arabic, latin)
    if vocalized:
      oldtempl = "%s" % unicode(template)
      addparam(template, param, vocalized)
      msg("Page %s %s: Replaced %s with %s" % (index, pagetitle,
        oldtempl, unicode(template)))
      return vocalized
  return True
Example #50
0
 def fix_page_params(t):
   origt = unicode(t)
   for param in ["page", "pages"]:
     pageval = getparam(t, param)
     if re.search(r"^\s*pp?\.\s*", pageval):
       pageval = re.sub(r"^(\s*)pp?\.\s*", r"\1", pageval)
       t.add(param, pageval)
       notes.append("remove p(p). from %s=" % param)
       pagemsg("remove p(p). from %s=" % param)
   if re.search(r"^[0-9]+$", getparam(t, "pages").strip()):
     move_param(t, "pages", "page")
   if re.search(r"^[0-9]+[-–—]$", getparam(t, "page").strip()):
     move_param(t, "page", "pages")
   return origt != unicode(t)
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)

  found_audio = False
  for t in parsed.filter_templates():
    if unicode(t.name) == "audio" and getparam(t, "lang") == "ru":
      found_audio = True
      break
  if found_audio:
    new_text = re.sub(r"\n*\[\[Category:Russian terms with audio links]]\n*", "\n\n", text)
    if new_text != text:
      comment = "Remove redundant [[:Category:Russian terms with audio links]]"
      if save:
        pagemsg("Saving with comment = %s" % comment)
        page.text = new_text
        page.save(comment=comment)
      else:
        pagemsg("Would save with comment = %s" % comment)
  def process_new_style_headword(htemp):
    # Split out the arg sets in the declension and check the
    # lemma of each one, taking care to handle cases where there is no lemma
    # (it would default to the page name).

    highest_numbered_param = 0
    for p in htemp.params:
      pname = unicode(p.name)
      if re.search("^[0-9]+$", pname):
        highest_numbered_param = max(highest_numbered_param, int(pname))

    # Now split based on arg sets.
    arg_set = []
    for i in xrange(1, highest_numbered_param + 2):
      end_arg_set = False
      val = getparam(htemp, str(i))
      if (i == highest_numbered_param + 1 or val in ["or", "_", "-"] or
          re.search("^join:", val)):
        end_arg_set = True

      if end_arg_set:
        process_arg_set(arg_set)
        arg_set = []
      else:
        arg_set.append(val)
Example #53
0
def process_page(index, page, save, verbose, direc):
    pagetitle = unicode(page.title())
    subpagetitle = re.sub(".*:", "", pagetitle)

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []
    text = unicode(page.text)
    parsed = blib.parse(page)

    def frob_gender_param(t, param):
        val = getparam(t, param)
        if val == "n":
            t.add(param, "n-in")
        elif val == "n-p":
            t.add(param, "n-in-p")

    for t in parsed.filter_templates():
        if unicode(t.name) in ["ru-noun+", "ru-noun-table"]:
            origt = unicode(t)
            for param in t.params:
                if unicode(param.name) != "1":
                    pagemsg("WARNING: Found other than a single param in template, skipping: %s" % unicode(t))
                    return
            FIXME
            if origt != unicode(t):
                param3 = getparam(t, "3")
                if param3 != "-":
                    if fix_indeclinable:
                        if param3:
                            pagemsg("WARNING: Can't make indeclinable, has genitive singular given: %s" % origt)
                            return
                        else:
                            t.add("3", "-")
                            notes.append("make indeclinable")
                            pagemsg("Making indeclinable: %s" % unicode(t))
                    else:
                        pagemsg("WARNING: Would add inanimacy to neuter, but isn't marked as indeclinable: %s" % origt)
                        return
                pagemsg("Replacing %s with %s" % (origt, unicode(t)))

    new_text = unicode(parsed)

    if new_text != text:
        if verbose:
            pagemsg("Replacing <%s> with <%s>" % (text, new_text))
        if notes:
            comment = "Add inanimacy to neuters (%s)" % "; ".join(notes)
        else:
            comment = "Add inanimacy to neuters"
        if save:
            pagemsg("Saving with comment = %s" % comment)
            page.text = new_text
            page.save(comment=comment)
        else:
            pagemsg("Would save with comment = %s" % comment)
Example #54
0
def infer_one_page_decls_1(page, index, text):
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, unicode(page.title()), txt))
  for tempname in decl_templates:
    for t in text.filter_templates():
      if unicode(t.name).strip() == tempname:
        orig_template = unicode(t)
        args = infer_decl(t, pagemsg)
        if not args:
          # At least combine stem and declension, blanking decl when possible.
          stem, decl = combine_stem(getparam(t, "1"), getparam(t, "2"))
          t.add("1", stem)
          t.add("2", decl)
          # Remove any trailing blank arguments.
          for i in xrange(15, 0, -1):
            if not getparam(t, i):
              rmparam(t, i)
            else:
              break
          new_template = unicode(t)
          if orig_template != new_template:
            if not compare_results(orig_template, new_template, pagemsg):
              return None, None
        else:
          for i in xrange(15, 0, -1):
            rmparam(t, i)
          rmparam(t, "short_m")
          rmparam(t, "short_f")
          rmparam(t, "short_n")
          rmparam(t, "short_p")
          t.name = tempname
          i = 1
          for arg in args:
            if "=" in arg:
              name, value = re.split("=", arg)
              t.add(name, value)
            else:
              t.add(i, arg)
              i += 1
          new_template = unicode(t)
        if orig_template != new_template:
          if verbose:
            pagemsg("Replacing %s with %s" % (orig_template, new_template))

  return text, "Convert adj decl to new form and infer short-accent pattern"
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    param1 = getparam(t, "1")
    if unicode(t.name) in ["ru-conj"]:
      if re.search(r"^6[ac]", param1):
        if getparam(t, "no_iotation"):
          rmparam(t, "no_iotation")
          if param1.startswith("6a"):
            notes.append(u"6a + no_iotation -> 6°a")
          else:
            notes.append(u"6c + no_iotation -> 6°c")
          t.add("1", re.sub("^6", u"6°", param1))
      elif re.search(r"^6b", param1):
        notes.append(u"6b -> 6°b")
        t.add("1", re.sub("^6", u"6°", param1))
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
 def check_for_al(param):
   param = remove_links(param)
   value = getparam(headword_template, param)
   if value:
     if '[' in value or ']' in value or '|' in value:
       pagemsg("Param %s value %s has link in it" % (param, value))
       add_note("removed links from %s" % param)
       value = remove_links(value)
     putp(param, remove_al(value))
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    if (unicode(t.name) in ["ru-conj", "ru-conj-old"] and
        getparam(t, "2") in ["7a", "7b"]):
      if [x for x in t.params if unicode(x.value) == "or"]:
        pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t))
        continue
      if t.has("past_adv_part_short") and getparam(t, "past_adv_part_short") == "":
        notes.append("set past_adv_part_short=-")
        origt = unicode(t)
        t.add("past_adv_part_short", "-")
        pagemsg("Replacing %s with %s" % (origt, unicode(t)))
      if t.has("past_actv_part") and getparam(t, "past_actv_part") == "":
        notes.append("set past_actv_part=-")
        origt = unicode(t)
        t.add("past_actv_part", "-")
        pagemsg("Replacing %s with %s" % (origt, unicode(t)))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

  if not notes:
    pagemsg("WARNING: No changes")
 def get_form_class(k):
   formclass = None
   parsed = blib.parse_text(etymologies[j])
   for t in parsed.filter_templates():
     if t.name in ["ar-verb", "ar-verb-form"]:
       newformclass = getparam(t, "1")
       if formclass and newformclass and formclass != newformclass:
         pagemsg("WARNING: Something wrong: Two different verb form classes in same etymology: %s != %s" % (formclass, newformclass))
       formclass = newformclass
   return formclass
Example #59
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    if unicode(t.name) == "ru-phrase":
      if t.has("tr"):
        pagemsg("WARNING: Has tr=: %s" % unicode(t))
      if t.has("head"):
        if t.has("1"):
          pagemsg("WARNING: Has both head= and 1=: %s" % unicode(t))
        else:
          notes.append("ru-phrase: convert head= to 1=")
          origt = unicode(t)
          head = getparam(t, "head")
          rmparam(t, "head")
          tr = getparam(t, "tr")
          rmparam(t, "tr")
          t.add("1", head)
          if tr:
            t.add("tr", tr)
          pagemsg("Replacing %s with %s" % (origt, unicode(t)))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Example #60
0
 def fetch_numbered_params(t):
   p = []
   for i in xrange(1,10):
     val = getparam(t, str(i)) or ""
     p.append(val)
   for i in xrange(8,-1,-1):
     if p[i]:
       break
     else:
       del p[i]
   return p