def undo_one_page_greek_removal(page, index, text):
   def pagemsg(txt):
     msg("Page %s %s: %s" % (index, unicode(page.title()), txt))
   template = blib.parse_text(template_text).filter_templates()[0]
   orig_template = unicode(template)
   if getparam(template, "sc") == "polytonic":
     template.remove("sc")
   to_template = unicode(template)
   param_value = getparam(template, removed_param)
   template.remove(removed_param)
   from_template = unicode(template)
   text = unicode(text)
   found_orig_template = orig_template in text
   newtext = text.replace(from_template, to_template)
   changelog = ""
   if newtext == text:
     if not found_orig_template:
       pagemsg("WARNING: Unable to locate 'from' template when undoing Greek param removal: %s"
           % from_template)
     else:
       pagemsg("Original template found, taking no action")
   else:
     if found_orig_template:
       pagemsg("WARNING: Undid removal, but original template %s already present!" %
           orig_template)
     if len(newtext) - len(text) != len(to_template) - len(from_template):
       pagemsg("WARNING: Length mismatch when undoing Greek param removal, may have matched multiple templates: from=%s, to=%s" % (
         from_template, to_template))
     changelog = "Undid removal of %s=%s in %s" % (removed_param,
         param_value, to_template)
     pagemsg("Change log = %s" % changelog)
   return newtext, changelog
def process_page(index, page):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)

  foundrussian = False
  sections = re.split("(^==[^=]*==\n)", text, 0, re.M)

  for j in xrange(2, len(sections), 2):
    if sections[j-1] == "==Russian==\n":
      if foundrussian:
        pagemsg("WARNING: Found multiple Russian sections, skipping page")
        return
      foundrussian = True

      found_headword_template = False
      parsed = blib.parse_text(sections[j])
      for t in parsed.filter_templates():
        tname = unicode(t.name)
        if tname == "ru-adj" or (tname == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "adjective form"):
          found_headword_template = True
      if not found_headword_template and "===Adjective===" in sections[j]:
        pagemsg("WARNING: Missing adj headword template")
Example #3
0
 def fix_cite_book_params(t):
   origt = unicode(t)
   if getparam(t, "origyear").strip() and getparam(t, "year").strip():
     if getparam(t, "year_published"):
       pagemsg("WARNING: Would set year_published= but is already present: %s"
           % unicode(t))
     else:
       rmparam(t, "year_published") # in case of blank param
       t.get("year").name = "year_published"
       t.get("origyear").name = "year"
       pagemsg("year -> year_published, origyear -> year")
   move_param(t, "origdate", "date")
   move_param(t, "origmonth", "month")
   def frob_isbn(idval):
     isbn_re = r"^(\s*)(10-ISBN +|ISBN-13 +|ISBN:? +|ISBN[-=] *)"
     if re.search(isbn_re, idval, re.I):
       return re.sub(isbn_re, r"\1", idval, 0, re.I)
     elif re.search(r"^[0-9]", idval.strip()):
       return idval
     else:
       pagemsg("WARNING: Would replace id= -> isbn= but id=%s doesn't begin with 'ISBN '" %
           idval.replace("\n", r"\n"))
       return None
   move_param(t, "id", "isbn", frob_isbn)
   fix_page_params(t)
   return origt != unicode(t)
Example #4
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  def expand_text(tempcall):
    return blib.expand_text(tempcall, pagetitle, pagemsg, verbose)

  pagemsg("Processing")

  parsed = blib.parse(page)
  for t in parsed.filter_templates():
    if unicode(t.name) in ["ru-conj", "ru-conj-old"] and getparam(t, "1").startswith("pf"):
      if tname == "ru-conj":
        tempcall = re.sub(r"\{\{ru-conj", "{{ru-generate-verb-forms", unicode(t))
      else:
        tempcall = re.sub(r"\{\{ru-conj-old", "{{ru-generate-verb-forms|old=y", unicode(t))
      result = expand_text(tempcall)
      if not result:
        pagemsg("WARNING: Error generating forms, skipping")
        continue
      args = rulib.split_generate_args(result)
      for base in ["past_pasv_part", "ppp"]:
        for i in ["", "2", "3", "4", "5", "6", "7", "8", "9"]:
          val = getparam(t, base + i)
          if val and val != "-":
            val = re.sub("//.*", "", val)
            pagemsg("Found perfective past passive participle: %s" % val)
Example #5
0
 def do_one_page_verb(page, index, text):
   pagename = page.title()
   verbcount = 0
   verbids = []
   for template in text.filter_templates():
     if template.name == "ar-conj":
       verbcount += 1
       vnvalue = getparam(template, "vn")
       uncertain = False
       if vnvalue.endswith("?"):
         vnvalue = vnvalue[:-1]
         msg("Page %s %s: Verbal noun(s) identified as uncertain" % (
           index, pagename))
         uncertain = True
       if not vnvalue:
         continue
       vns = re.split(u"[,،]", vnvalue)
       form = getparam(template, "1")
       verbid = "#%s form %s" % (verbcount, form)
       if re.match("^[1I](-|$)", form):
         verbid += " (%s,%s)" % (getparam(template, "2"), getparam(template, "3"))
       no_i3rab_vns = []
       for vn in vns:
         no_i3rab_vns.append(remove_i3rab(pagename, index, verbid, vn))
       newvn = ",".join(no_i3rab_vns)
       if uncertain:
         newvn += "?"
       if newvn != vnvalue:
         msg("Page %s %s: Verb %s, replacing %s with %s" % (
           index, pagename, verbid, vnvalue, newvn))
         addparam(template, "vn", newvn)
         verbids.append(verbid)
   return text, "Remove i3rab from verbal nouns for verb(s) %s" % (
         ', '.join(verbids))
Example #6
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  parsed = blib.parse(page)
  found_page_head = False
  for t in parsed.filter_templates():
    found_this_head = False
    tname = unicode(t.name)
    if tname in ru_head_templates:
      headname = tname
      found_this_head = True
    elif tname == "head" and getparam(t, "1") == "ru":
      headtype = getparam(t, "2")
      headname = "head|ru|%s" % headtype
      if headtype in ru_heads_to_warn_about:
        pagemsg("WARNING: Found %s" % headname)
      found_this_head = True
    if found_this_head:
      cat_head_count[headname] = cat_head_count.get(headname, 0) + 1
      overall_head_count[headname] = overall_head_count.get(headname, 0) + 1
      found_page_head = True
  if not found_page_head:
    pagemsg("WARNING: No head")
  if index % 100 == 0:
    output_heads_seen()
Example #7
0
def canon_param(pagetitle, index, template, param, paramtr, translit_module,
    include_tempname_in_changelog=False):
  if isinstance(param, list):
    fromparam, toparam = param
  else:
    fromparam, toparam = (param, param)
  foreign = (pagetitle if fromparam == "page title" else
    getparam(template, fromparam))
  latin = getparam(template, paramtr)
  if not foreign:
    return False
  canonforeign, canonlatin, actions = do_canon_param(pagetitle, index,
      template, fromparam, toparam, paramtr, foreign, latin, translit_module,
      include_tempname_in_changelog)
  oldtempl = "%s" % unicode(template)
  if canonforeign:
    addparam(template, toparam, canonforeign)
  if canonlatin == True:
    template.remove(paramtr)
  elif canonlatin:
    addparam(template, paramtr, canonlatin)
  if canonforeign or canonlatin:
    msg("Page %s %s: Replaced %s with %s" % (index, pagetitle,
      oldtempl, unicode(template)))
  return actions
Example #8
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  text = unicode(page.text)
  parsed = blib.parse(page)

  for t in parsed.filter_templates():
    if unicode(t.name) == "ru-IPA":
      origt = unicode(t)
      if getparam(t, "phon"):
        pagemsg("phon= already present: %s" % unicode(t))
      else:
        phon = getparam(t, "1")
        pagemsg("Adding phon=: %s" % unicode(t))
        rmparam(t, "1")
        t.add("phon", phon)
        pagemsg("Replaced %s with %s" % (origt, unicode(t)))

  newtext = unicode(parsed)

  if newtext != text:
    if verbose:
      pagemsg("Replacing <<%s>> with <<%s>>" % (text, newtext))
    comment = "Add phon= to ru-IPA templates"
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = newtext
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
  else:
    pagemsg("Skipping")
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  found_inflection_of = False
  found_head_verb_form = False
  for t in parsed.filter_templates():
    if unicode(t.name) in ["inflection of"]:
      found_inflection_of = True
    if unicode(t.name) == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "verb form":
      found_head_verb_form = True

  if not found_head_verb_form or not found_inflection_of:
    # Find definition line
    foundrussian = False
    sections = re.split("(^==[^=]*==\n)", unicode(page.text), 0, re.M)

    for j in xrange(2, len(sections), 2):
      if sections[j-1] == "==Russian==\n":
        if foundrussian:
          pagemsg("WARNING: Found multiple Russian sections, skipping page")
          return
        foundrussian = True

        deflines = r"\n".join(re.findall(r"^(# .*)$", sections[j], re.M))

  if not found_head_verb_form:
    pagemsg("WARNING: No {{head|ru|verb form}}: %s" % deflines)
  if not found_inflection_of:
    pagemsg("WARNING: No 'inflection of': %s" % deflines)
def rewrite_one_page_verb_headword(page, index, text):
  pagetitle = page.title()
  msg("Processing page %s" % pagetitle)
  actions_taken = []

  for template in text.filter_templates():
    if template.name in ["ar-verb"]:
      origtemp = unicode(template)
      form = getparam(template, "form")
      if form:
        # In order to keep in the same order, just forcibly change the
        # param "names" (numbers)
        for pno in xrange(10, 0, -1):
          if template.has(str(pno)):
            template.get(str(pno)).name = str(pno + 1)
        # Make sure form= param is first ...
        template.remove("form")
        addparam(template, "form", canonicalize_form(form), before=template.params[0].name if len(template.params) > 0 else None)
        # ... then forcibly change its name to 1=
        template.get("form").name = "1"
        template.get("1").showkey = False
      newtemp = unicode(template)
      if origtemp != newtemp:
        msg("Replacing %s with %s" % (origtemp, newtemp))
      if re.match("^[1I](-|$)", form):
        actions_taken.append("form=%s (%s/%s)" % (form,
          getparam(template, "2"), getparam(template, "3")))
      else:
        actions_taken.append("form=%s" % form)
  changelog = "ar-verb: form= -> 1= and canonicalize to Roman numerals, move other params up: %s" % '; '.join(actions_taken)
  if len(actions_taken) > 0:
    msg("Change log = %s" % changelog)
  return text, changelog
Example #11
0
 def fix_quote_usenet_params(t):
   origt = unicode(t)
   monthday = getparam(t, "monthday").strip()
   year = getparam(t, "year").strip()
   if monthday and year:
     if getparam(t, "date"):
       pagemsg("WARNING: Would set date= but is already present: %s"
           % unicode(t))
     else:
       rmparam(t, "date") # in case of blank param
       param = t.get("monthday")
       param.name = "date"
       if re.search("^[0-9]+/[0-9]+$", monthday):
         param.value = "%s/%s" % (monthday, year)
       else:
         param.value = "%s %s" % (monthday, year)
       rmparam(t, "year")
       pagemsg("monthday/year -> date")
   move_param(t, "group", "newsgroup")
   move_param(t, "text", "passage")
   move_param(t, "6", "passage")
   move_param(t, "5", "url")
   move_param(t, "4", "newsgroup")
   move_param(t, "3", "title")
   move_param(t, "2", "author")
   move_param(t, "1", "date")
   return origt != unicode(t)
  def canonicalize_one_page_verb_form(page, index, text):
    pagetitle = page.title()
    msg("Processing page %s" % pagetitle)
    actions_taken = []

    for template in text.filter_templates():
      if template.name == tempname:
        origtemp = unicode(template)
        form = getparam(template, formarg)
        if form:
          addparam(template, formarg, canonicalize_form(form))
        newtemp = unicode(template)
        if origtemp != newtemp:
          msg("Replacing %s with %s" % (origtemp, newtemp))
        if re.match("^[1I](-|$)", form):
          actions_taken.append("form=%s (%s/%s)" % (form,
            getparam(template, str(1+int(formarg))),
            getparam(template, str(2+int(formarg)))))
        else:
          actions_taken.append("form=%s" % form)
    changelog = "%s: canonicalize form (%s=) to Roman numerals: %s" % (
        tempname, formarg, '; '.join(actions_taken))
    if len(actions_taken) > 0:
      msg("Change log = %s" % changelog)
    return text, changelog
Example #13
0
def process_page(templates, index, page, save=False, verbose=False):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  if not page.exists():
    pagemsg("WARNING: Page doesn't exist")
    return

  parsed = blib.parse(page)

  should_save = False

  for t in parsed.filter_templates():

    if unicode(t.name) in templates:
      origt = unicode(t)
      # Punt if multi-arg-set, can't handle yet
      should_continue = False
      for param in t.params:
        if not param.showkey:
          val = unicode(param.value)
          if val == "or":
            pagemsg("WARNING: Can't handle multi-decl templates: %s" % unicode(t))
            should_continue = True
            break
          if val == "-" or val == "_" or val.startswith("join:"):
            pagemsg("WARNING: Can't handle multi-word templates: %s" % unicode(t))
            should_continue = True
            break
      if should_continue:
        continue

      if arg1_is_stress(getparam(t, "1")):
        oldplarg = "5"
        newplarg = "4"
      else:
        oldplarg = "4"
        newplarg = "3"
      plstem = getparam(t, oldplarg)
      if plstem:
        if getparam(t, newplarg):
          pagemsg("WARNING: Something wrong, found args in both positions %s and %s: %s" %
              (newplarg, oldplarg, unicode(t)))
          continue
        rmparam(t, oldplarg)
        t.add(newplarg, plstem)
        should_save = True
        pagemsg("Replacing %s with %s" % (origt, unicode(t)))

  if should_save:
    comment = "Move plstem from 5th/4th argument to 4th/3rd"
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = unicode(parsed)
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
def process_template(pagetitle, index, template, ruparam, trparam, output_line,
    find_accents, verbose):
  origt = unicode(template)
  saveparam = ruparam
  def pagemsg(text):
    msg("Page %s %s: %s" % (index, pagetitle, text))
  def expand_text(tempcall):
    return blib.expand_text(tempcall, pagetitle, pagemsg, semi_verbose)
  if semi_verbose:
    pagemsg("Processing template: %s" % unicode(template))
  if unicode(template.name) == "head":
    # Skip {{head}}. We don't want to mess with headwords.
    return False
  if isinstance(ruparam, list):
    ruparam, saveparam = ruparam
  if ruparam == "page title":
    val = pagetitle
  else:
    val = getparam(template, ruparam)
  valtr = getparam(template, trparam) if trparam else ""
  changed = False
  if find_accents:
    newval, newtr = find_accented(val, valtr, verbose, pagemsg, expand_text,
        origt)
    if newval != val or newtr != valtr:
      if ru.remove_accents(newval) != ru.remove_accents(val):
        pagemsg("WARNING: Accented page %s changed from %s in more than just accents, not changing" % (newval, val))
      else:
        changed = True
        addparam(template, saveparam, newval)
        if newtr:
          if not trparam:
            pagemsg("WARNING: Unable to change translit to %s because no translit param available (Cyrillic param %s): %s" %
                (newtr, saveparam, origt))
          elif unicode(template.name) in ["ru-ux"]:
            pagemsg("WARNING: Not changing or adding translit param %s=%s to ru-ux: origt=%s" % (
              trparam, newtr, origt))
          else:
            if valtr and valtr != newtr:
              pagemsg("WARNING: Changed translit param %s from %s to %s: origt=%s" %
                  (trparam, valtr, newtr, origt))
            if not valtr:
              pagemsg("NOTE: Added translit param %s=%s to template: origt=%s" %
                  (trparam, newtr, origt))
            addparam(template, trparam, newtr)
        elif valtr:
          pagemsg("WARNING: Template has translit %s but lookup result has none, leaving translit alone: origt=%s" %
              (valtr, origt))
        if check_need_accent(newval):
          output_line("Need accents (changed)")
        else:
          output_line("Found accents")
  if not changed and check_need_accent(val):
    output_line("Need accents")
  if changed:
    pagemsg("Replaced %s with %s" % (origt, unicode(template)))
  return ["auto-accent %s%s" % (newval, "//%s" % newtr if newtr else "")] if changed else False
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-conj"]:
      conjtype = getparam(t, "1")
      if conjtype.startswith("6a"):
        param6 = getparam(t, "6")
        if param6:
          rmparam(t, "6")
          if not getparam(t, "5"):
            rmparam(t, "5")
          for i in xrange(1, 4):
            if not t.has(str(i)):
              t.add(str(i), "")
          t.add("4", param6)
          notes.append("move type 6a arg6 -> arg4")
      if conjtype.startswith("7b"):
        param7 = getparam(t, "7")
        if param7:
          rmparam(t, "7")
          for i in xrange(1, 6):
            if not t.has(str(i)):
              t.add(str(i), "")
          t.add("6", param7)
          notes.append("move type 7b arg7 -> arg6")
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Example #16
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    if unicode(t.name) in ["ru-conj-7a", "ru-conj-7b"]:
      past_stem = getparam(t, "4")
      vowel_end = re.search(u"[аэыоуяеиёю́]$", past_stem)
      past_m = getparam(t, "past_m")
      past_f = getparam(t, "past_f")
      past_n = getparam(t, "past_n")
      past_pl = getparam(t, "past_pl")
      if past_m or past_f or past_n or past_pl:
        upast_stem = ru.make_unstressed(past_stem)
        expected_past_m = past_stem + (u"л" if vowel_end else "")
        expected_past_f = upast_stem + u"ла́"
        expected_past_n = upast_stem + u"ло́"
        expected_past_pl = upast_stem + u"ли́"
        if ((not past_m or expected_past_m == past_m) and
            expected_past_f == past_f and
            expected_past_n == past_n and
            expected_past_pl == past_pl):
          msg("Would remove past overrides and add arg5=b")
        else:
          msg("WARNING: Remaining past overrides: past_m=%s, past_f=%s, past_n=%s, past_pl=%s, expected_past_m=%s, expected_past_f=%s, expected_past_n=%s, expected_past_pl=%s" %
              (past_m, past_f, past_n, past_pl, expected_past_m, expected_past_f, expected_past_n, expected_past_pl))
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  subpagetitle = re.sub("^.*:", "", pagetitle)
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  if ":" in pagetitle:
    pagemsg("WARNING: Colon in page title, skipping page")
    return

  text = unicode(page.text)
  notes = []

  foundrussian = False
  sections = re.split("(^==[^=]*==\n)", text, 0, re.M)

  for j in xrange(2, len(sections), 2):
    if sections[j-1] == "==Russian==\n":
      if foundrussian:
        pagemsg("WARNING: Found multiple Russian sections, skipping page")
        return
      foundrussian = True

      # Remove gender from adjective forms
      parsed = blib.parse_text(sections[j])
      for t in parsed.filter_templates():
        if unicode(t.name) == "head" and getparam(t, "1") == "ru" and getparam(t, "2") == "adjective form":
          origt = unicode(t)
          rmparam(t, "g")
          rmparam(t, "g2")
          rmparam(t, "g3")
          rmparam(t, "g4")
          newt = unicode(t)
          if origt != newt:
            pagemsg("Replaced %s with %s" % (origt, newt))
            notes.append("remove gender from adjective forms")
      sections[j] = unicode(parsed)
  new_text = "".join(sections)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(blib.group_notes(notes))
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Example #18
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    param2 = getparam(t, "2")
    if unicode(t.name) in ["ru-conj"] and re.search(r"^8[ab]", param2):
      if [x for x in t.params if unicode(x.value) == "or"]:
        pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t))
        continue
      past_m = getparam(t, "past_m")
      if past_m:
        rmparam(t, "past_m")
        stem = getparam(t, "3")
        if stem == past_m:
          pagemsg("Stem %s and past_m same" % stem)
          notes.append("remove redundant past_m %s" % past_m)
        elif (param2.startswith("8b") and not param2.startswith("8b/") and
            ru.make_unstressed(past_m) == stem):
          pagemsg("Class 8b/b and stem %s is unstressed version of past_m %s, replacing stem with past_m" % (
            stem, past_m))
          t.add("3", past_m)
          notes.append("moving past_m %s to arg 3" % past_m)
        else:
          pagemsg("Stem %s and past_m %s are different, putting past_m in param 5" % (
            stem, past_m))
          t.add("5", past_m)
          notes.append("moving past_m %s to arg 5" % past_m)
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Example #19
0
 def fix_page_params(t):
   origt = unicode(t)
   for param in ["page", "pages"]:
     pageval = getparam(t, param)
     if re.search(r"^\s*pp?\.\s*", pageval):
       pageval = re.sub(r"^(\s*)pp?\.\s*", r"\1", pageval)
       t.add(param, pageval)
       notes.append("remove p(p). from %s=" % param)
       pagemsg("remove p(p). from %s=" % param)
   if re.search(r"^[0-9]+$", getparam(t, "pages").strip()):
     move_param(t, "pages", "page")
   if re.search(r"^[0-9]+[-–—]$", getparam(t, "page").strip()):
     move_param(t, "page", "pages")
   return origt != unicode(t)
Example #20
0
def vocalize_param(pagetitle, index, template, param, paramtr):
  arabic = getparam(template, param)
  latin = getparam(template, paramtr)
  if not arabic:
    return False
  if latin:
    vocalized = do_vocalize_param(pagetitle, index, template, param, arabic, latin)
    if vocalized:
      oldtempl = "%s" % unicode(template)
      addparam(template, param, vocalized)
      msg("Page %s %s: Replaced %s with %s" % (index, pagetitle,
        oldtempl, unicode(template)))
      return vocalized
  return True
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)

  found_audio = False
  for t in parsed.filter_templates():
    if unicode(t.name) == "audio" and getparam(t, "lang") == "ru":
      found_audio = True
      break
  if found_audio:
    new_text = re.sub(r"\n*\[\[Category:Russian terms with audio links]]\n*", "\n\n", text)
    if new_text != text:
      comment = "Remove redundant [[:Category:Russian terms with audio links]]"
      if save:
        pagemsg("Saving with comment = %s" % comment)
        page.text = new_text
        page.save(comment=comment)
      else:
        pagemsg("Would save with comment = %s" % comment)
  def process_new_style_headword(htemp):
    # Split out the arg sets in the declension and check the
    # lemma of each one, taking care to handle cases where there is no lemma
    # (it would default to the page name).

    highest_numbered_param = 0
    for p in htemp.params:
      pname = unicode(p.name)
      if re.search("^[0-9]+$", pname):
        highest_numbered_param = max(highest_numbered_param, int(pname))

    # Now split based on arg sets.
    arg_set = []
    for i in xrange(1, highest_numbered_param + 2):
      end_arg_set = False
      val = getparam(htemp, str(i))
      if (i == highest_numbered_param + 1 or val in ["or", "_", "-"] or
          re.search("^join:", val)):
        end_arg_set = True

      if end_arg_set:
        process_arg_set(arg_set)
        arg_set = []
      else:
        arg_set.append(val)
Example #23
0
def process_page(index, page, save, verbose, direc):
    pagetitle = unicode(page.title())
    subpagetitle = re.sub(".*:", "", pagetitle)

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    notes = []
    text = unicode(page.text)
    parsed = blib.parse(page)

    def frob_gender_param(t, param):
        val = getparam(t, param)
        if val == "n":
            t.add(param, "n-in")
        elif val == "n-p":
            t.add(param, "n-in-p")

    for t in parsed.filter_templates():
        if unicode(t.name) in ["ru-noun+", "ru-noun-table"]:
            origt = unicode(t)
            for param in t.params:
                if unicode(param.name) != "1":
                    pagemsg("WARNING: Found other than a single param in template, skipping: %s" % unicode(t))
                    return
            FIXME
            if origt != unicode(t):
                param3 = getparam(t, "3")
                if param3 != "-":
                    if fix_indeclinable:
                        if param3:
                            pagemsg("WARNING: Can't make indeclinable, has genitive singular given: %s" % origt)
                            return
                        else:
                            t.add("3", "-")
                            notes.append("make indeclinable")
                            pagemsg("Making indeclinable: %s" % unicode(t))
                    else:
                        pagemsg("WARNING: Would add inanimacy to neuter, but isn't marked as indeclinable: %s" % origt)
                        return
                pagemsg("Replacing %s with %s" % (origt, unicode(t)))

    new_text = unicode(parsed)

    if new_text != text:
        if verbose:
            pagemsg("Replacing <%s> with <%s>" % (text, new_text))
        if notes:
            comment = "Add inanimacy to neuters (%s)" % "; ".join(notes)
        else:
            comment = "Add inanimacy to neuters"
        if save:
            pagemsg("Saving with comment = %s" % comment)
            page.text = new_text
            page.save(comment=comment)
        else:
            pagemsg("Would save with comment = %s" % comment)
Example #24
0
def infer_one_page_decls_1(page, index, text):
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, unicode(page.title()), txt))
  for tempname in decl_templates:
    for t in text.filter_templates():
      if unicode(t.name).strip() == tempname:
        orig_template = unicode(t)
        args = infer_decl(t, pagemsg)
        if not args:
          # At least combine stem and declension, blanking decl when possible.
          stem, decl = combine_stem(getparam(t, "1"), getparam(t, "2"))
          t.add("1", stem)
          t.add("2", decl)
          # Remove any trailing blank arguments.
          for i in xrange(15, 0, -1):
            if not getparam(t, i):
              rmparam(t, i)
            else:
              break
          new_template = unicode(t)
          if orig_template != new_template:
            if not compare_results(orig_template, new_template, pagemsg):
              return None, None
        else:
          for i in xrange(15, 0, -1):
            rmparam(t, i)
          rmparam(t, "short_m")
          rmparam(t, "short_f")
          rmparam(t, "short_n")
          rmparam(t, "short_p")
          t.name = tempname
          i = 1
          for arg in args:
            if "=" in arg:
              name, value = re.split("=", arg)
              t.add(name, value)
            else:
              t.add(i, arg)
              i += 1
          new_template = unicode(t)
        if orig_template != new_template:
          if verbose:
            pagemsg("Replacing %s with %s" % (orig_template, new_template))

  return text, "Convert adj decl to new form and infer short-accent pattern"
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("WARNING: Script no longer applies and would need fixing up")
  return

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    origt = unicode(t)
    param1 = getparam(t, "1")
    if unicode(t.name) in ["ru-conj"]:
      if re.search(r"^6[ac]", param1):
        if getparam(t, "no_iotation"):
          rmparam(t, "no_iotation")
          if param1.startswith("6a"):
            notes.append(u"6a + no_iotation -> 6°a")
          else:
            notes.append(u"6c + no_iotation -> 6°c")
          t.add("1", re.sub("^6", u"6°", param1))
      elif re.search(r"^6b", param1):
        notes.append(u"6b -> 6°b")
        t.add("1", re.sub("^6", u"6°", param1))
    newt = unicode(t)
    if origt != newt:
      pagemsg("Replaced %s with %s" % (origt, newt))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
 def check_for_al(param):
   param = remove_links(param)
   value = getparam(headword_template, param)
   if value:
     if '[' in value or ']' in value or '|' in value:
       pagemsg("Param %s value %s has link in it" % (param, value))
       add_note("removed links from %s" % param)
       value = remove_links(value)
     putp(param, remove_al(value))
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    if (unicode(t.name) in ["ru-conj", "ru-conj-old"] and
        getparam(t, "2") in ["7a", "7b"]):
      if [x for x in t.params if unicode(x.value) == "or"]:
        pagemsg("WARNING: Skipping multi-arg conjugation: %s" % unicode(t))
        continue
      if t.has("past_adv_part_short") and getparam(t, "past_adv_part_short") == "":
        notes.append("set past_adv_part_short=-")
        origt = unicode(t)
        t.add("past_adv_part_short", "-")
        pagemsg("Replacing %s with %s" % (origt, unicode(t)))
      if t.has("past_actv_part") and getparam(t, "past_actv_part") == "":
        notes.append("set past_actv_part=-")
        origt = unicode(t)
        t.add("past_actv_part", "-")
        pagemsg("Replacing %s with %s" % (origt, unicode(t)))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)

  if not notes:
    pagemsg("WARNING: No changes")
 def get_form_class(k):
   formclass = None
   parsed = blib.parse_text(etymologies[j])
   for t in parsed.filter_templates():
     if t.name in ["ar-verb", "ar-verb-form"]:
       newformclass = getparam(t, "1")
       if formclass and newformclass and formclass != newformclass:
         pagemsg("WARNING: Something wrong: Two different verb form classes in same etymology: %s != %s" % (formclass, newformclass))
       formclass = newformclass
   return formclass
Example #29
0
def process_page(index, page, save, verbose):
  pagetitle = unicode(page.title())
  def pagemsg(txt):
    msg("Page %s %s: %s" % (index, pagetitle, txt))

  pagemsg("Processing")

  text = unicode(page.text)
  parsed = blib.parse(page)
  notes = []
  for t in parsed.filter_templates():
    if unicode(t.name) == "ru-phrase":
      if t.has("tr"):
        pagemsg("WARNING: Has tr=: %s" % unicode(t))
      if t.has("head"):
        if t.has("1"):
          pagemsg("WARNING: Has both head= and 1=: %s" % unicode(t))
        else:
          notes.append("ru-phrase: convert head= to 1=")
          origt = unicode(t)
          head = getparam(t, "head")
          rmparam(t, "head")
          tr = getparam(t, "tr")
          rmparam(t, "tr")
          t.add("1", head)
          if tr:
            t.add("tr", tr)
          pagemsg("Replacing %s with %s" % (origt, unicode(t)))

  new_text = unicode(parsed)

  if new_text != text:
    if verbose:
      pagemsg("Replacing <%s> with <%s>" % (text, new_text))
    assert notes
    comment = "; ".join(notes)
    if save:
      pagemsg("Saving with comment = %s" % comment)
      page.text = new_text
      page.save(comment=comment)
    else:
      pagemsg("Would save with comment = %s" % comment)
Example #30
0
 def fetch_numbered_params(t):
   p = []
   for i in xrange(1,10):
     val = getparam(t, str(i)) or ""
     p.append(val)
   for i in xrange(8,-1,-1):
     if p[i]:
       break
     else:
       del p[i]
   return p