コード例 #1
0
  def find_decl_args(lemma, infl, wordind):
    declpage = pywikibot.Page(site, lemma)
    if ru.remove_accents(infl) == lemma:
      wordlink = "[[%s]]" % infl
    else:
      wordlink = "[[%s|%s]]" % (lemma, infl)

    if not declpage.exists():
      if lemma in is_short_adj or re.search(u"(ий|ый|ой)$", lemma):
        pagemsg("WARNING: Page doesn't exist, assuming word #%s adjectival: lemma=%s, infl=%s" %
            (wordind, lemma, infl))
        return [("1", wordlink), ("2", "+")], True, None, None
      else:
        pagemsg("WARNING: Page doesn't exist, can't locate decl for word #%s, skipping: lemma=%s, infl=%s" %
            (wordind, lemma, infl))
        return None
    parsed = blib.parse_text(declpage.text)
    decl_templates = []
    headword_templates = []
    decl_z_templates = []
    for t in parsed.filter_templates():
      tname = unicode(t.name)
      if tname in ["ru-noun-table", "ru-decl-adj"]:
        pagemsg("find_decl_args: Found decl template: %s" % unicode(t))
        decl_templates.append(t)
      if tname in ["ru-noun", "ru-proper noun"]:
        pagemsg("find_decl_args: Found headword template: %s" % unicode(t))
        headword_templates.append(t)
      if tname in ["ru-decl-noun-z"]:
        pagemsg("find_decl_args: Found z-decl template: %s" % unicode(t))
        decl_z_templates.append(t)

    if not decl_templates:
      if decl_z_templates:
        # {{ru-decl-noun-z|звезда́|f-in|d|ё}}
        # {{ru-decl-noun-z|ёж|m-inan|b}}
        if len(decl_z_templates) > 1:
          pagemsg("WARNING: Multiple decl-z templates during decl lookup for word #%s, skipping: lemma=%s, infl=%s" %
            (wordind, lemma, infl))
          return None
        else:
          decl_z_template = decl_z_templates[0]
          headword_template = None
          pagemsg("find_decl_args: Using z-decl template: %s" %
              unicode(decl_z_template))
          if len(headword_templates) == 0:
            pagemsg("WARNING: find_decl_args: No headword templates for use with z-decl template conversion during decl lookup for word #%s: lemma=%s, infl=%s, zdecl=%s" %
                (wordind, lemma, infl, unicode(decl_z_template)))
          elif len(headword_templates) > 1:
            pagemsg("WARNING: find_decl_args: Multiple headword templates for use with z-decl template conversion during decl lookup for word #%s, ignoring: lemma=%s, infl=%s, zdecl=%s" %
                (wordind, lemma, infl, unicode(decl_z_template)))
          else:
            headword_template = headword_templates[0]
            pagemsg("find_decl_args: For word #%s, lemma=%s, infl=%s, using headword template %s for use with z-decl template %s" %
                (wordind, lemma, infl, unicode(headword_template),
                  unicode(decl_z_template)))
          decl_template = runoun.convert_zdecl_to_ru_noun_table(decl_z_template,
              subpagetitle, pagemsg, headword_template=headword_template)
          decl_templates = [decl_template]

      elif "[[Category:Russian indeclinable nouns]]" in declpage.text or [
        x for x in headword_templates if getparam(x, "3") == "-"]:
        return [("1", wordlink), ("2", "$")], False, None, None
      else:
        pagemsg("WARNING: No decl template during decl lookup for word #%s, skipping: lemma=%s, infl=%s" %
            (wordind, lemma, infl))
        return None

    if len(decl_templates) == 1:
      decl_template = decl_templates[0]
    else:
      # Multiple decl templates
      for t in decl_templates:
        if unicode(t.name) == "ru-decl-adj" and re.search(u"(ий|ый|ой)$", lemma):
          pagemsg("WARNING: Multiple decl templates during decl lookup for word #%s, assuming adjectival: lemma=%s, infl=%s" %
            (wordind, lemma, infl))
          decl_template = t
          break
      else:
        if lemma in use_given_decl:
          overriding_decl = use_given_decl[lemma]
          pagemsg("WARNING: Multiple decl templates during decl lookup for word #%s and not adjectival, using overriding declension %s: lemma=%s, infl=%s" %
              (wordind, overriding_decl, lemma, infl))
          decl_template = blib.parse_text(overriding_decl).filter_templates()[0]
        elif pagetitle in use_given_page_decl:
          overriding_decl = use_given_page_decl[pagetitle].get(lemma, None)
          if not overriding_decl:
            pagemsg("WARNING: Missing entry for ambiguous-decl lemma for word #%s, skipping: lemma=%s, infl=%s" %
              (wordind, lemma, infl))
            return
          else:
            pagemsg("WARNING: Multiple decl templates during decl lookup for word #%s and not adjectival, using overriding declension %s: lemma=%s, infl=%s" %
                (wordind, overriding_decl, lemma, infl))
            decl_template = blib.parse_text(overriding_decl).filter_templates()[0]
        else:
          pagemsg("WARNING: Multiple decl templates during decl lookup for word #%s and not adjectival, skipping: lemma=%s, infl=%s" %
              (wordind, lemma, infl))
          return None

    pagemsg("find_decl_args: Using decl template: %s" % unicode(decl_template))
    if unicode(decl_template.name) == "ru-decl-adj":
      if re.search(ur"\bь\b", getparam(decl_template, "2"), re.U):
        return [("1", wordlink), ("2", u"+ь")], True, None, None
      else:
        return [("1", wordlink), ("2", "+")], True, None, None
コード例 #2
0
def process_page(page, index, parsed):
    global args
    pagetitle = unicode(page.title())
    subpagetitle = re.sub(".*:", "", pagetitle)

    def pagemsg(txt):
        msg("Page %s %s: %s" % (index, pagetitle, txt))

    pagemsg("Processing")

    if ":" in pagetitle:
        pagemsg("WARNING: Colon in page title, skipping")
        return

    def expand_text(tempcall):
        return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose)

    parsed = blib.parse(page)

    headword_templates = []
    for t in parsed.filter_templates():
        if unicode(t.name) in ["ru-noun", "ru-proper noun"]:
            headword_templates.append(t)

    headword_template = None
    if len(headword_templates) > 1:
        pagemsg(
            "WARNING: Multiple old-style headword templates, not sure which one to use, using none"
        )
        for ht in headword_templates:
            pagemsg("Ignored headword template: %s" % unicode(ht))
    elif len(headword_templates) == 0:
        pagemsg("WARNING: No old-style headword templates")
    else:
        headword_template = headword_templates[0]
        pagemsg("Found headword template: %s" % unicode(headword_template))

    num_z_decl = 0
    for t in parsed.filter_templates():
        if unicode(t.name) == "ru-decl-noun-z":
            num_z_decl += 1
            pagemsg("Found z-decl template: %s" % unicode(t))
            ru_noun_table_template = runounlib.convert_zdecl_to_ru_noun_table(
                t, subpagetitle, pagemsg, headword_template=headword_template)
            if not ru_noun_table_template:
                pagemsg("WARNING: Unable to convert z-decl template: %s" %
                        unicode(t))
                continue

            if headword_template:
                generate_template = re.sub(r"^\{\{ru-noun-table",
                                           "{{ru-generate-noun-args",
                                           unicode(ru_noun_table_template))
                if unicode(headword_template.name) == "ru-proper noun":
                    generate_template = re.sub(r"\}\}$", "|ndef=sg}}",
                                               generate_template)

                def pagemsg_with_proposed(text):
                    pagemsg("Proposed ru-noun-table template: %s" %
                            unicode(ru_noun_table_template))
                    pagemsg(text)

                generate_result = expand_text(unicode(generate_template))
                if not generate_result:
                    pagemsg_with_proposed(
                        "WARNING: Error generating noun args, skipping")
                    continue
                args = blib.split_generate_args(generate_result)

                # This will check number mismatch and animacy mismatch
                new_genders = runounlib.check_old_noun_headword_forms(
                    headword_template, args, subpagetitle,
                    pagemsg_with_proposed)
                if new_genders == None:
                    continue

            origt = unicode(t)
            t.name = "ru-noun-table"
            del t.params[:]
            for param in ru_noun_table_template.params:
                t.add(param.name, param.value)
            pagemsg("Replacing z-decl %s with regular decl %s" %
                    (origt, unicode(t)))

    if num_z_decl > 1:
        pagemsg("WARNING: Found multiple z-decl templates (%s)" % num_z_decl)

    return unicode(parsed), "Replace ru-decl-noun-z with ru-noun-table"
コード例 #3
0
    def find_decl_args(lemma, infl, wordind):
        declpage = pywikibot.Page(site, lemma)
        if rulib.remove_accents(infl) == lemma:
            wordlink = "[[%s]]" % infl
        else:
            wordlink = "[[%s|%s]]" % (lemma, infl)

        if not declpage.exists():
            if lemma in is_short_adj or re.search(u"(ий|ый|ой)$", lemma):
                pagemsg(
                    "WARNING: Page doesn't exist, assuming word #%s adjectival: lemma=%s, infl=%s"
                    % (wordind, lemma, infl))
                return [("1", wordlink), ("2", "+")], True, None, None
            else:
                pagemsg(
                    "WARNING: Page doesn't exist, can't locate decl for word #%s, skipping: lemma=%s, infl=%s"
                    % (wordind, lemma, infl))
                return None
        parsed = blib.parse_text(declpage.text)
        decl_templates = []
        headword_templates = []
        decl_z_templates = []
        for t in parsed.filter_templates():
            tname = unicode(t.name)
            if tname in ["ru-noun-table", "ru-decl-adj"]:
                pagemsg("find_decl_args: Found decl template: %s" % unicode(t))
                decl_templates.append(t)
            if tname in ["ru-noun", "ru-proper noun"]:
                pagemsg("find_decl_args: Found headword template: %s" %
                        unicode(t))
                headword_templates.append(t)
            if tname in ["ru-decl-noun-z"]:
                pagemsg("find_decl_args: Found z-decl template: %s" %
                        unicode(t))
                decl_z_templates.append(t)

        if not decl_templates:
            if decl_z_templates:
                # {{ru-decl-noun-z|звезда́|f-in|d|ё}}
                # {{ru-decl-noun-z|ёж|m-inan|b}}
                if len(decl_z_templates) > 1:
                    pagemsg(
                        "WARNING: Multiple decl-z templates during decl lookup for word #%s, skipping: lemma=%s, infl=%s"
                        % (wordind, lemma, infl))
                    return None
                else:
                    decl_z_template = decl_z_templates[0]
                    headword_template = None
                    pagemsg("find_decl_args: Using z-decl template: %s" %
                            unicode(decl_z_template))
                    if len(headword_templates) == 0:
                        pagemsg(
                            "WARNING: find_decl_args: No headword templates for use with z-decl template conversion during decl lookup for word #%s: lemma=%s, infl=%s, zdecl=%s"
                            % (wordind, lemma, infl, unicode(decl_z_template)))
                    elif len(headword_templates) > 1:
                        pagemsg(
                            "WARNING: find_decl_args: Multiple headword templates for use with z-decl template conversion during decl lookup for word #%s, ignoring: lemma=%s, infl=%s, zdecl=%s"
                            % (wordind, lemma, infl, unicode(decl_z_template)))
                    else:
                        headword_template = headword_templates[0]
                        pagemsg(
                            "find_decl_args: For word #%s, lemma=%s, infl=%s, using headword template %s for use with z-decl template %s"
                            %
                            (wordind, lemma, infl, unicode(headword_template),
                             unicode(decl_z_template)))
                    decl_template = runounlib.convert_zdecl_to_ru_noun_table(
                        decl_z_template,
                        subpagetitle,
                        pagemsg,
                        headword_template=headword_template)
                    decl_templates = [decl_template]

            elif "[[Category:Russian indeclinable nouns]]" in declpage.text or [
                    x for x in headword_templates if getparam(x, "3") == "-"
            ]:
                return [("1", wordlink), ("2", "$")], False, None, None
            else:
                pagemsg(
                    "WARNING: No decl template during decl lookup for word #%s, skipping: lemma=%s, infl=%s"
                    % (wordind, lemma, infl))
                return None

        if len(decl_templates) == 1:
            decl_template = decl_templates[0]
        else:
            # Multiple decl templates
            for t in decl_templates:
                if unicode(t.name) == "ru-decl-adj" and re.search(
                        u"(ий|ый|ой)$", lemma):
                    pagemsg(
                        "WARNING: Multiple decl templates during decl lookup for word #%s, assuming adjectival: lemma=%s, infl=%s"
                        % (wordind, lemma, infl))
                    decl_template = t
                    break
            else:
                if lemma in use_given_decl:
                    overriding_decl = use_given_decl[lemma]
                    pagemsg(
                        "WARNING: Multiple decl templates during decl lookup for word #%s and not adjectival, using overriding declension %s: lemma=%s, infl=%s"
                        % (wordind, overriding_decl, lemma, infl))
                    decl_template = blib.parse_text(
                        overriding_decl).filter_templates()[0]
                elif pagetitle in use_given_page_decl:
                    overriding_decl = use_given_page_decl[pagetitle].get(
                        lemma, None)
                    if not overriding_decl:
                        pagemsg(
                            "WARNING: Missing entry for ambiguous-decl lemma for word #%s, skipping: lemma=%s, infl=%s"
                            % (wordind, lemma, infl))
                        return
                    else:
                        pagemsg(
                            "WARNING: Multiple decl templates during decl lookup for word #%s and not adjectival, using overriding declension %s: lemma=%s, infl=%s"
                            % (wordind, overriding_decl, lemma, infl))
                        decl_template = blib.parse_text(
                            overriding_decl).filter_templates()[0]
                else:
                    pagemsg(
                        "WARNING: Multiple decl templates during decl lookup for word #%s and not adjectival, skipping: lemma=%s, infl=%s"
                        % (wordind, lemma, infl))
                    return None

        pagemsg("find_decl_args: Using decl template: %s" %
                unicode(decl_template))
        if unicode(decl_template.name) == "ru-decl-adj":
            if re.search(ur"\bь\b", getparam(decl_template, "2"), re.U):
                return [("1", wordlink), ("2", u"+ь")], True, None, None
            else:
                return [("1", wordlink), ("2", "+")], True, None, None