def add_necessary_breathing(w): s = syllabify(w) o, n, c = onset_nucleus_coda(s[0]) if o == "" and not breathing(n): a = accent(n) if a: if len(n) == 2: n = n[0] + add_diacritic(add_diacritic(base(n[1]), SMOOTH), a) else: n = add_diacritic(add_diacritic(base(n), SMOOTH), a) else: if len(n) == 2: n = n[0] + add_diacritic(base(n[1]), SMOOTH) else: n = add_diacritic(base(n), SMOOTH) return o + n + c + "".join(s[1:]) else: return w
def syllable_accent(s): for ch in nucleus(s): a = accent(ch) if a: return a
def generate(self, lemma, parse, allow_form_override=True, context=None): answers = [] stems = None accent_override = None is_enclitic = False ending_override = None if lemma in self.lexicon: if allow_form_override: answer = self.lexicon[lemma].get("forms", {}).get(parse) if answer: return answer stems = self.regex_list(lemma, parse, context) if "." in parse: accents = self.lexicon[lemma].get("accents", {}).get(parse.split(".")[0]) if accents == "enclitic": is_enclitic = True else: accent_override = accents ending_override = self.lexicon[lemma].get("endings", {}).get(parse) if stems is None: return else: stems = stems.split("/") if parse not in stemming_rules: return for stem in stems: stem = debreath(stem) pairs = stemming_rules[parse] while isinstance(pairs, dict) and "ref" in pairs: if pairs["ref"] in stemming_rules: pairs = stemming_rules[pairs["ref"]] else: # @@@ raise error? return base_endings = [] default = [] for rule in pairs: s1, s234, s5 = rule.split("|") s2, s34 = s234.split(">") s3, s4 = s34.split("<") if stem.endswith(strip_accents(s1 + s2)): if s2: base = stem[:-len(s2)] else: base = stem else: continue if ending_override: ending_list = ending_override.split("/") else: ending_list = [s3 + s5] if s1 + s2: base_endings.append((base, ending_list)) else: default.append((base, ending_list)) # only use default if there are no other options if len(base_endings) == 0 and len(default) > 0: base_endings = default for base, ending_list in base_endings: for ending in ending_list: if accent(ending): answers.append((base + ending).replace("|", "")) elif is_enclitic: answers.append(make_oxytone(base + ending).replace("|", "")) else: if parse[2] == "P": if accent_override: answers.append(persistent(base + ending, accent_override)) elif parse == "AAP.NSM" and ending == "ων": answers.append(make_oxytone(base + ending).replace("|", "")) elif parse == "AAP.NSM" and ending == "_3+ς": answers.append(make_oxytone(base + ending).replace("|", "")) elif parse == "PAP.NSM" and ending == "_3+ς": answers.append(make_oxytone(base + ending).replace("|", "")) elif parse[0:3] == "AAP" and parse != "AAP.NSM": # calculate NSM nsms = self.generate(lemma, "AAP.NSM", context=context) nsms = nsms.split("/") for nsm in nsms: if nsm.endswith(("ών", "ούς")): answers.append(persistent(base + ending, nsm)) else: answers.append(persistent(base + ending, lemma)) elif parse[0:3] == "PAP" and parse != "PAP.NSM": # calculate NSM nsms = self.generate(lemma, "PAP.NSM").split("/") for nsm in nsms: nsm = strip_length(nsm) answers.append(persistent(base + ending, nsm)) else: answers.append(recessive(base + ending, default_short=True)) elif parse[0:3] in ["AAN", "XAN", "XMN", "XPN"]: answers.append(on_penult(base + ending, default_short=True)) elif parse[0:3] == "PAN" and stem.endswith("!"): answers.append(on_penult(base + ending, default_short=True)) else: answers.append(recessive(base + ending, default_short=True)) return "/".join(remove_duplicates(rebreath(w) for w in answers))