Ejemplo n.º 1
0
def add_necessary_breathing(w):
    s = syllabify(w)
    o, n, c = onset_nucleus_coda(s[0])
    if o == "" and not breathing(n):
        a = accent(n)
        if a:
            if len(n) == 2:
                n = n[0] + add_diacritic(add_diacritic(base(n[1]), SMOOTH), a)
            else:
                n = add_diacritic(add_diacritic(base(n), SMOOTH), a)
        else:
            if len(n) == 2:
                n = n[0] + add_diacritic(base(n[1]), SMOOTH)
            else:
                n = add_diacritic(base(n), SMOOTH)
        return o + n + c + "".join(s[1:])
    else:
        return w
Ejemplo n.º 2
0
def syllable_accent(s):
    for ch in nucleus(s):
        a = accent(ch)
        if a:
            return a
Ejemplo n.º 3
0
    def generate(self, lemma, parse, allow_form_override=True, context=None):
        answers = []
        stems = None
        accent_override = None
        is_enclitic = False
        ending_override = None

        if lemma in self.lexicon:
            if allow_form_override:
                answer = self.lexicon[lemma].get("forms", {}).get(parse)
                if answer:
                    return answer

            stems = self.regex_list(lemma, parse, context)

            if "." in parse:
                accents = self.lexicon[lemma].get("accents", {}).get(parse.split(".")[0])
                if accents == "enclitic":
                    is_enclitic = True
                else:
                    accent_override = accents

            ending_override = self.lexicon[lemma].get("endings", {}).get(parse)

        if stems is None:
            return
        else:
            stems = stems.split("/")

        if parse not in stemming_rules:
            return

        for stem in stems:
            stem = debreath(stem)
            pairs = stemming_rules[parse]
            while isinstance(pairs, dict) and "ref" in pairs:
                if pairs["ref"] in stemming_rules:
                    pairs = stemming_rules[pairs["ref"]]
                else:
                    # @@@ raise error?
                    return
            base_endings = []
            default = []
            for rule in pairs:
                s1, s234, s5 = rule.split("|")
                s2, s34 = s234.split(">")
                s3, s4 = s34.split("<")

                if stem.endswith(strip_accents(s1 + s2)):
                    if s2:
                        base = stem[:-len(s2)]
                    else:
                        base = stem
                else:
                    continue

                if ending_override:
                    ending_list = ending_override.split("/")
                else:
                    ending_list = [s3 + s5]

                if s1 + s2:
                    base_endings.append((base, ending_list))
                else:
                    default.append((base, ending_list))

            # only use default if there are no other options
            if len(base_endings) == 0 and len(default) > 0:
                base_endings = default

            for base, ending_list in base_endings:
                for ending in ending_list:
                    if accent(ending):
                        answers.append((base + ending).replace("|", ""))
                    elif is_enclitic:
                        answers.append(make_oxytone(base + ending).replace("|", ""))
                    else:
                        if parse[2] == "P":
                            if accent_override:
                                answers.append(persistent(base + ending, accent_override))
                            elif parse == "AAP.NSM" and ending == "ων":
                                answers.append(make_oxytone(base + ending).replace("|", ""))
                            elif parse == "AAP.NSM" and ending == "_3+ς":
                                answers.append(make_oxytone(base + ending).replace("|", ""))
                            elif parse == "PAP.NSM" and ending == "_3+ς":
                                answers.append(make_oxytone(base + ending).replace("|", ""))
                            elif parse[0:3] == "AAP" and parse != "AAP.NSM":
                                # calculate NSM
                                nsms = self.generate(lemma, "AAP.NSM", context=context)
                                nsms = nsms.split("/")
                                for nsm in nsms:
                                    if nsm.endswith(("ών", "ούς")):
                                        answers.append(persistent(base + ending, nsm))
                                    else:
                                        answers.append(persistent(base + ending, lemma))
                            elif parse[0:3] == "PAP" and parse != "PAP.NSM":
                                # calculate NSM
                                nsms = self.generate(lemma, "PAP.NSM").split("/")
                                for nsm in nsms:
                                    nsm = strip_length(nsm)
                                    answers.append(persistent(base + ending, nsm))
                            else:
                                answers.append(recessive(base + ending, default_short=True))
                        elif parse[0:3] in ["AAN", "XAN", "XMN", "XPN"]:
                            answers.append(on_penult(base + ending, default_short=True))
                        elif parse[0:3] == "PAN" and stem.endswith("!"):
                            answers.append(on_penult(base + ending, default_short=True))
                        else:
                            answers.append(recessive(base + ending, default_short=True))

        return "/".join(remove_duplicates(rebreath(w) for w in answers))