def find_decl_args(lemma, infl, wordind): declpage = pywikibot.Page(site, lemma) if ru.remove_accents(infl) == lemma: wordlink = "[[%s]]" % infl else: wordlink = "[[%s|%s]]" % (lemma, infl) if not declpage.exists(): if lemma in is_short_adj or re.search(u"(ий|ый|ой)$", lemma): pagemsg("WARNING: Page doesn't exist, assuming word #%s adjectival: lemma=%s, infl=%s" % (wordind, lemma, infl)) return [("1", wordlink), ("2", "+")], True, None, None else: pagemsg("WARNING: Page doesn't exist, can't locate decl for word #%s, skipping: lemma=%s, infl=%s" % (wordind, lemma, infl)) return None parsed = blib.parse_text(declpage.text) decl_templates = [] headword_templates = [] decl_z_templates = [] for t in parsed.filter_templates(): tname = unicode(t.name) if tname in ["ru-noun-table", "ru-decl-adj"]: pagemsg("find_decl_args: Found decl template: %s" % unicode(t)) decl_templates.append(t) if tname in ["ru-noun", "ru-proper noun"]: pagemsg("find_decl_args: Found headword template: %s" % unicode(t)) headword_templates.append(t) if tname in ["ru-decl-noun-z"]: pagemsg("find_decl_args: Found z-decl template: %s" % unicode(t)) decl_z_templates.append(t) if not decl_templates: if decl_z_templates: # {{ru-decl-noun-z|звезда́|f-in|d|ё}} # {{ru-decl-noun-z|ёж|m-inan|b}} if len(decl_z_templates) > 1: pagemsg("WARNING: Multiple decl-z templates during decl lookup for word #%s, skipping: lemma=%s, infl=%s" % (wordind, lemma, infl)) return None else: decl_z_template = decl_z_templates[0] headword_template = None pagemsg("find_decl_args: Using z-decl template: %s" % unicode(decl_z_template)) if len(headword_templates) == 0: pagemsg("WARNING: find_decl_args: No headword templates for use with z-decl template conversion during decl lookup for word #%s: lemma=%s, infl=%s, zdecl=%s" % (wordind, lemma, infl, unicode(decl_z_template))) elif len(headword_templates) > 1: pagemsg("WARNING: find_decl_args: Multiple headword templates for use with z-decl template conversion during decl lookup for word #%s, ignoring: lemma=%s, infl=%s, zdecl=%s" % (wordind, lemma, infl, unicode(decl_z_template))) else: headword_template = headword_templates[0] pagemsg("find_decl_args: For word #%s, lemma=%s, infl=%s, using headword template %s for use with z-decl template %s" % (wordind, lemma, infl, unicode(headword_template), unicode(decl_z_template))) decl_template = runoun.convert_zdecl_to_ru_noun_table(decl_z_template, subpagetitle, pagemsg, headword_template=headword_template) decl_templates = [decl_template] elif "[[Category:Russian indeclinable nouns]]" in declpage.text or [ x for x in headword_templates if getparam(x, "3") == "-"]: return [("1", wordlink), ("2", "$")], False, None, None else: pagemsg("WARNING: No decl template during decl lookup for word #%s, skipping: lemma=%s, infl=%s" % (wordind, lemma, infl)) return None if len(decl_templates) == 1: decl_template = decl_templates[0] else: # Multiple decl templates for t in decl_templates: if unicode(t.name) == "ru-decl-adj" and re.search(u"(ий|ый|ой)$", lemma): pagemsg("WARNING: Multiple decl templates during decl lookup for word #%s, assuming adjectival: lemma=%s, infl=%s" % (wordind, lemma, infl)) decl_template = t break else: if lemma in use_given_decl: overriding_decl = use_given_decl[lemma] pagemsg("WARNING: Multiple decl templates during decl lookup for word #%s and not adjectival, using overriding declension %s: lemma=%s, infl=%s" % (wordind, overriding_decl, lemma, infl)) decl_template = blib.parse_text(overriding_decl).filter_templates()[0] elif pagetitle in use_given_page_decl: overriding_decl = use_given_page_decl[pagetitle].get(lemma, None) if not overriding_decl: pagemsg("WARNING: Missing entry for ambiguous-decl lemma for word #%s, skipping: lemma=%s, infl=%s" % (wordind, lemma, infl)) return else: pagemsg("WARNING: Multiple decl templates during decl lookup for word #%s and not adjectival, using overriding declension %s: lemma=%s, infl=%s" % (wordind, overriding_decl, lemma, infl)) decl_template = blib.parse_text(overriding_decl).filter_templates()[0] else: pagemsg("WARNING: Multiple decl templates during decl lookup for word #%s and not adjectival, skipping: lemma=%s, infl=%s" % (wordind, lemma, infl)) return None pagemsg("find_decl_args: Using decl template: %s" % unicode(decl_template)) if unicode(decl_template.name) == "ru-decl-adj": if re.search(ur"\bь\b", getparam(decl_template, "2"), re.U): return [("1", wordlink), ("2", u"+ь")], True, None, None else: return [("1", wordlink), ("2", "+")], True, None, None
def process_page(page, index, parsed): global args pagetitle = unicode(page.title()) subpagetitle = re.sub(".*:", "", pagetitle) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) pagemsg("Processing") if ":" in pagetitle: pagemsg("WARNING: Colon in page title, skipping") return def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose) parsed = blib.parse(page) headword_templates = [] for t in parsed.filter_templates(): if unicode(t.name) in ["ru-noun", "ru-proper noun"]: headword_templates.append(t) headword_template = None if len(headword_templates) > 1: pagemsg( "WARNING: Multiple old-style headword templates, not sure which one to use, using none" ) for ht in headword_templates: pagemsg("Ignored headword template: %s" % unicode(ht)) elif len(headword_templates) == 0: pagemsg("WARNING: No old-style headword templates") else: headword_template = headword_templates[0] pagemsg("Found headword template: %s" % unicode(headword_template)) num_z_decl = 0 for t in parsed.filter_templates(): if unicode(t.name) == "ru-decl-noun-z": num_z_decl += 1 pagemsg("Found z-decl template: %s" % unicode(t)) ru_noun_table_template = runounlib.convert_zdecl_to_ru_noun_table( t, subpagetitle, pagemsg, headword_template=headword_template) if not ru_noun_table_template: pagemsg("WARNING: Unable to convert z-decl template: %s" % unicode(t)) continue if headword_template: generate_template = re.sub(r"^\{\{ru-noun-table", "{{ru-generate-noun-args", unicode(ru_noun_table_template)) if unicode(headword_template.name) == "ru-proper noun": generate_template = re.sub(r"\}\}$", "|ndef=sg}}", generate_template) def pagemsg_with_proposed(text): pagemsg("Proposed ru-noun-table template: %s" % unicode(ru_noun_table_template)) pagemsg(text) generate_result = expand_text(unicode(generate_template)) if not generate_result: pagemsg_with_proposed( "WARNING: Error generating noun args, skipping") continue args = blib.split_generate_args(generate_result) # This will check number mismatch and animacy mismatch new_genders = runounlib.check_old_noun_headword_forms( headword_template, args, subpagetitle, pagemsg_with_proposed) if new_genders == None: continue origt = unicode(t) t.name = "ru-noun-table" del t.params[:] for param in ru_noun_table_template.params: t.add(param.name, param.value) pagemsg("Replacing z-decl %s with regular decl %s" % (origt, unicode(t))) if num_z_decl > 1: pagemsg("WARNING: Found multiple z-decl templates (%s)" % num_z_decl) return unicode(parsed), "Replace ru-decl-noun-z with ru-noun-table"
def find_decl_args(lemma, infl, wordind): declpage = pywikibot.Page(site, lemma) if rulib.remove_accents(infl) == lemma: wordlink = "[[%s]]" % infl else: wordlink = "[[%s|%s]]" % (lemma, infl) if not declpage.exists(): if lemma in is_short_adj or re.search(u"(ий|ый|ой)$", lemma): pagemsg( "WARNING: Page doesn't exist, assuming word #%s adjectival: lemma=%s, infl=%s" % (wordind, lemma, infl)) return [("1", wordlink), ("2", "+")], True, None, None else: pagemsg( "WARNING: Page doesn't exist, can't locate decl for word #%s, skipping: lemma=%s, infl=%s" % (wordind, lemma, infl)) return None parsed = blib.parse_text(declpage.text) decl_templates = [] headword_templates = [] decl_z_templates = [] for t in parsed.filter_templates(): tname = unicode(t.name) if tname in ["ru-noun-table", "ru-decl-adj"]: pagemsg("find_decl_args: Found decl template: %s" % unicode(t)) decl_templates.append(t) if tname in ["ru-noun", "ru-proper noun"]: pagemsg("find_decl_args: Found headword template: %s" % unicode(t)) headword_templates.append(t) if tname in ["ru-decl-noun-z"]: pagemsg("find_decl_args: Found z-decl template: %s" % unicode(t)) decl_z_templates.append(t) if not decl_templates: if decl_z_templates: # {{ru-decl-noun-z|звезда́|f-in|d|ё}} # {{ru-decl-noun-z|ёж|m-inan|b}} if len(decl_z_templates) > 1: pagemsg( "WARNING: Multiple decl-z templates during decl lookup for word #%s, skipping: lemma=%s, infl=%s" % (wordind, lemma, infl)) return None else: decl_z_template = decl_z_templates[0] headword_template = None pagemsg("find_decl_args: Using z-decl template: %s" % unicode(decl_z_template)) if len(headword_templates) == 0: pagemsg( "WARNING: find_decl_args: No headword templates for use with z-decl template conversion during decl lookup for word #%s: lemma=%s, infl=%s, zdecl=%s" % (wordind, lemma, infl, unicode(decl_z_template))) elif len(headword_templates) > 1: pagemsg( "WARNING: find_decl_args: Multiple headword templates for use with z-decl template conversion during decl lookup for word #%s, ignoring: lemma=%s, infl=%s, zdecl=%s" % (wordind, lemma, infl, unicode(decl_z_template))) else: headword_template = headword_templates[0] pagemsg( "find_decl_args: For word #%s, lemma=%s, infl=%s, using headword template %s for use with z-decl template %s" % (wordind, lemma, infl, unicode(headword_template), unicode(decl_z_template))) decl_template = runounlib.convert_zdecl_to_ru_noun_table( decl_z_template, subpagetitle, pagemsg, headword_template=headword_template) decl_templates = [decl_template] elif "[[Category:Russian indeclinable nouns]]" in declpage.text or [ x for x in headword_templates if getparam(x, "3") == "-" ]: return [("1", wordlink), ("2", "$")], False, None, None else: pagemsg( "WARNING: No decl template during decl lookup for word #%s, skipping: lemma=%s, infl=%s" % (wordind, lemma, infl)) return None if len(decl_templates) == 1: decl_template = decl_templates[0] else: # Multiple decl templates for t in decl_templates: if unicode(t.name) == "ru-decl-adj" and re.search( u"(ий|ый|ой)$", lemma): pagemsg( "WARNING: Multiple decl templates during decl lookup for word #%s, assuming adjectival: lemma=%s, infl=%s" % (wordind, lemma, infl)) decl_template = t break else: if lemma in use_given_decl: overriding_decl = use_given_decl[lemma] pagemsg( "WARNING: Multiple decl templates during decl lookup for word #%s and not adjectival, using overriding declension %s: lemma=%s, infl=%s" % (wordind, overriding_decl, lemma, infl)) decl_template = blib.parse_text( overriding_decl).filter_templates()[0] elif pagetitle in use_given_page_decl: overriding_decl = use_given_page_decl[pagetitle].get( lemma, None) if not overriding_decl: pagemsg( "WARNING: Missing entry for ambiguous-decl lemma for word #%s, skipping: lemma=%s, infl=%s" % (wordind, lemma, infl)) return else: pagemsg( "WARNING: Multiple decl templates during decl lookup for word #%s and not adjectival, using overriding declension %s: lemma=%s, infl=%s" % (wordind, overriding_decl, lemma, infl)) decl_template = blib.parse_text( overriding_decl).filter_templates()[0] else: pagemsg( "WARNING: Multiple decl templates during decl lookup for word #%s and not adjectival, skipping: lemma=%s, infl=%s" % (wordind, lemma, infl)) return None pagemsg("find_decl_args: Using decl template: %s" % unicode(decl_template)) if unicode(decl_template.name) == "ru-decl-adj": if re.search(ur"\bь\b", getparam(decl_template, "2"), re.U): return [("1", wordlink), ("2", u"+ь")], True, None, None else: return [("1", wordlink), ("2", "+")], True, None, None