def expand_text(tempcall): cache_key = (tempcall, lemma_pagetitle) if cache_key in expand_text_cache: retval = expand_text_cache[cache_key] if args.verbose: pagemsg("Found (%s, %s)=%s in expand_text_cache" % (tempcall, lemma_pagetitle, retval)) return retval if args.verbose: pagemsg("Couldn't find (%s, %s) in expand_text_cache" % (tempcall, lemma_pagetitle)) result = blib.expand_text(tempcall, lemma_pagetitle, pagemsg, args.verbose) expand_text_cache[cache_key] = result return result
def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, args.verbose)
def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose)
def expand_text(tempcall): return blib.expand_text(tempcall, pagename, pagemsg, semi_verbose)
def expand_text(tempcall): return blib.expand_text(tempcall, pagename, pagemsg, False)
def expand_text(tempcall): # The page name doesn't matter when we call {{xlit}}. return blib.expand_text(tempcall, "foo bar", pagemsg, verbose)
def process_page(index, page, save, verbose, adverbs, all_derived_lemmas): pagetitle = unicode(page.title()) def pagemsg(txt): msg("Page %s %s: %s" % (index, pagetitle, txt)) def errandpagemsg(txt): errandmsg("Page %s %s: %s" % (index, pagetitle, txt)) def expand_text(tempcall): return blib.expand_text(tempcall, pagetitle, pagemsg, verbose) pagemsg("Processing") # ending and whether final consonant is palatal endings = [ (u"ывать", False), (u"ивать", False), (u"ать", False), (u"ять", True), (u"еть", True), (u"ить", True), (u"нуть", False), (u"ия", True), (u"ие", True), (u"я", True), (u"е", True), (u"ь", True), (u"и", True), (u"а", False), (u"о", False), (u"ы", False), (u"ый", False), (u"ий", True), (u"ой", False), ] stems = [] for ending, is_palatal in endings: if pagetitle.endswith(ending): stem = re.sub(ending + "$", "", pagetitle) stems.append((stem, is_palatal)) if not stems: stems.append((pagetitle, False)) possible = [] def append_possible(stem_to_try, suffix): possible.append((stem_to_try.lower() + suffix, suffix)) # Try -ный/-ной, -ка, -ко for stem, palatal in stems: stems_to_try = [] def frob(stem): stem = first_palatalization(stem) if stem.endswith(u"л"): stem += u"ь" if re.search("[" + rulib.vowel + "]$", stem): stem += u"й" return stem to_try_1 = frob(stem) to_try_2 = rulib.dereduce_stem(stem, False) if to_try_2: to_try_2 = frob(rulib.remove_accents(to_try_2)) to_try_3 = rulib.dereduce_stem(stem, True) if to_try_3: to_try_3 = frob(rulib.remove_accents(to_try_3)) stems_to_try.append(to_try_1) if to_try_2: stems_to_try.append(to_try_2) if to_try_3 and to_try_3 != to_try_2: stems_to_try.append(to_try_3) for stem_to_try in stems_to_try: append_possible(stem_to_try, u"ный") append_possible(stem_to_try, u"ной") append_possible(stem_to_try, u"ский") append_possible(stem_to_try, u"ской") append_possible(stem_to_try, u"ник") append_possible(stem_to_try, u"чик") append_possible(stem_to_try, u"щик") append_possible(stem_to_try, u"ка") append_possible(stem_to_try, u"ко") append_possible(stem_to_try, u"ство") # Try -овый/-евый/-ёвый/-овой/-евой, -ик, -ок/-ек/-ёк for stem, palatal in stems: stems_to_try = [] stems_to_try.append(stem) reduced = rulib.reduce_stem(stem) if reduced: stems_to_try.append(reduced) for stem_to_try in stems_to_try: if stem_to_try.endswith(u"й"): stem_to_try = stem_to_try[:-1] append_possible(stem_to_try, u"овый") append_possible(stem_to_try, u"евый") append_possible(stem_to_try, u"ёвый") append_possible(stem_to_try, u"овой") append_possible(stem_to_try, u"евой") stem_to_try = first_palatalization(stem_to_try) append_possible(stem_to_try, u"еский") append_possible(stem_to_try, u"ический") append_possible(stem_to_try, u"ество") append_possible(stem_to_try, u"ик") append_possible(stem_to_try, u"ок") append_possible(stem_to_try, u"ек") append_possible(stem_to_try, u"ёк") append_possible(stem_to_try, u"ец") # If derived adverbs, try -о, -е, -и if adverbs: for stem, palatal in stems: stems_to_try = [] stems_to_try.append(stem) for stem_to_try in stems_to_try: append_possible(stem_to_try, u"о") append_possible(stem_to_try, u"е") append_possible(stem_to_try, u"и") would_output = False for possible_derived, suffix in possible: if possible_derived in all_derived_lemmas: would_output = True if not would_output: return text = unicode(page.text) if rulib.check_for_alt_yo_terms(text, pagemsg): return base_lemmas = [] for possible_derived, suffix in possible: if possible_derived in all_derived_lemmas: derived_section = blib.find_lang_section(possible_derived, "Russian", pagemsg, errandpagemsg) if not derived_section: errandpagemsg( "WARNING: Couldn't find Russian section for derived term %s" % possible_derived) continue if "==Etymology" in derived_section: pagemsg( "Skipping derived term %s because it already has an etymology" % possible_derived) continue derived_defns = rulib.find_defns(derived_section) if not derived_defns: errandpagemsg( "WARNING: Couldn't find definitions for derived term %s" % possible_derived) continue derived_parsed = blib.parse_text(derived_section) derived_lemmas = find_noun_lemmas( derived_parsed, possible_derived, errandpagemsg, lambda tempcall: blib.expand_text( tempcall, possible_derived, pagemsg, verbose)) for t in derived_parsed.filter_templates(): if tname(t) in ["ru-adj", "ru-adv"]: lemmas = blib.fetch_param_chain(t, "1", "head", possible_derived) trs = blib.fetch_param_chain(t, "tr", "tr") if trs: lemmas = [ "%s//%s" % (lemma, tr) for lemma, tr in zip(lemmas, trs) ] for lemma in lemmas: add_if_not(derived_lemmas, lemma) if not derived_lemmas: errandpagemsg("WARNING: No derived term lemmas for %s" % possible_derived) return if not base_lemmas: base_parsed = blib.parse_text(text) base_lemmas = find_noun_lemmas(base_parsed, pagetitle, errandpagemsg, expand_text) for t in base_parsed.filter_templates(): if tname(t) in ["ru-verb", "ru-adj"]: lemmas = blib.fetch_param_chain( t, "1", "head", pagetitle) trs = blib.fetch_param_chain(t, "tr", "tr") if trs: lemmas = [ "%s//%s" % (lemma, tr) for lemma, tr in zip(lemmas, trs) ] for lemma in lemmas: add_if_not(base_lemmas, lemma) if not base_lemmas: errandpagemsg("WARNING: No base lemmas") return base_lemmas = [ rulib.remove_monosyllabic_accents(x) for x in base_lemmas ] warnings = [] if len(base_lemmas) > 1: warnings.append("multiple-lemmas") if any("//" in lemma for lemma in base_lemmas): warnings.append("translit-in-lemma") base_section = blib.find_lang_section_from_text( text, "Russian", pagemsg) if not base_section: errandpagemsg( "WARNING: Couldn't find Russian section for base") return base_defns = rulib.find_defns(base_section) if not base_defns: errandpagemsg( "WARNING: Couldn't find definitions for base") return def concat_defns(defns): return ";".join(defns).replace("_", r"\u").replace(" ", "_") suffixes_with_stress = [] for suf in [ suffix, rulib.make_beginning_stressed_ru(suffix), rulib.make_ending_stressed_ru(suffix) ]: for derived_lemma in derived_lemmas: if derived_lemma.endswith(suf): add_if_not(suffixes_with_stress, suf) msg("%s %s+-%s%s no-etym possible-suffixed %s //// %s" % (",".join(derived_lemmas), ",".join(base_lemmas), ",".join(suffixes_with_stress), " WARNING:%s" % ",".join(warnings) if warnings else "", concat_defns(base_defns), concat_defns(derived_defns)))
def expand_text(tempcall): return blib.expand_text(tempcall, lemma, pagemsg, verbose)
def expand_text(tempcall): return blib.expand_text(tempcall, remove_macrons(lemma), pagemsg, verbose)
def expand_text(t): return blib.expand_text(t, lemma, pagemsg, verbose)
def expand_text(tempcall): return blib.expand_text(tempcall, remove_macrons(lemma, preserve_diaeresis), pagemsg, verbose)