コード例 #1
0
def create_present_passive_participle(_, root, pres_conjugation):
    pres_part_pass = []
    present_passive_participle = ''
    part_root = remove_all_diacritics(root)

    if pres_conjugation in ['con1_act', 'con1_pass']:
        pres_part_pass = [part_root + 'όμενος']

    elif pres_conjugation in ['con2a_act', 'con2ab_pass', 'con2a_pass']:
        pres_part_pass = [part_root + 'ώμενος', part_root + 'ούμενος']

    elif pres_conjugation == 'con2c_act':
        pres_part_pass = [part_root + 'γόμενος']

    elif pres_conjugation in ['con2b_act', 'con2b_pass', 'con2c_pass', 'con2d_act']:

        pres_part_pass = [part_root + 'ούμενος']
    elif pres_conjugation == 'con2e_pass':
        pres_part_pass = [part_root + 'άμενος']
    elif pres_conjugation == 'con2d_pass':
        pres_part_pass = [put_accent_on_the_antepenultimate(root + 'μενος')]

    # special case for xairomai
    if part_root == 'χαιρ':
        pres_part_pass = ['χαρούμενος']

    present_passive_participle = [part for part in pres_part_pass if part in greek_corpus]

    return ','.join(present_passive_participle)
コード例 #2
0
def put_accent_on_antepenultimate_in_all_forms(masc, forms):
    if where_is_accent(masc) == 'antepenultimate':
        for num in forms.keys():
            for gender in forms[num].keys():
                for case, form in forms[num][gender].items():
                    forms[num][gender][
                        case] = put_accent_on_the_antepenultimate(form)
    return forms
コード例 #3
0
def create_roots_from_past(verb, lemma):
    # argument only in 1st person
    res = None
    if verb[-1] in ['α']:
        stem = verb[:-1]
    else:
        return None

    deaugmented_stem = deaugment_stem(stem, lemma)
    deaugmented_stem_prefixed = deaugment_prefixed_stem(stem)

    if deaugmented_stem and put_accent_on_the_antepenultimate(
            deaugmented_stem + 'αμε') in greek_corpus:
        res = deaugmented_stem
    elif deaugmented_stem_prefixed and put_accent_on_the_antepenultimate(
            deaugmented_stem_prefixed + 'αμε') in greek_corpus:
        res = deaugmented_stem_prefixed

    return res
コード例 #4
0
def create_all_basic_adj_forms(adj, inflection=None):
    """
    :param inflection: if relevant, add 'aklito' flag if it is certain does not have any declination (like μωβ)
    :param adj: masc nom sg form (`ωραίος`)
    :return: dictionary with keys:
    'adj': masc, fem, neut forms as a string divided with / ('ωραίος/ωραία/ωραίο') if alternatives, they are added and
    separated with a coma
    'comparative': if exists in form parathetiko + ',' + alt_parathetiko + '/' + uperthetiko + ',' + alt_uperthetiko with
    form only in masc sing nom
    'adverb': adverb form, if alternatives, then separated with coma
    'adverb_comparative': if exists, adverb_parathetiko + ',' + alt_adverb_parathetiko + '/' + adverb_uperthetiko + ',' + alt_adverb_uperthetiko
    """



    if adj[-2:] == 'ον' and adj + 'τα' in greek_corpus:
        adj = adj[:-2] + 'ων'

    elif adj[-2:] == 'ές' and adj[:-2] + 'ής' in greek_corpus:
        #  ['εκκρεμές', 'λυκαυγές', 'αλκαλοειδές']:
        adj = adj[:-2] + 'ής'
    elif adj[-2:] == 'έν' and adj[:-2] + 'είς' in greek_corpus:
        # ['ανακοινωθέν']:
        adj = adj[:-2] + 'είς'
    elif adj[-2:] == 'ού':
        if adj[:-2] + 'άς' in greek_corpus:
            adj = adj[:-2] + 'άς'
        elif put_accent_on_the_penultimate(adj[:-2] + 'ης') in greek_corpus:
            adj = put_accent_on_the_penultimate(adj[:-2] + 'ης')
    elif adj[-1] == 'ί' and adj[:-1] + 'ής' in greek_corpus:
        adj = adj[:-1] + 'ής'
    accent = where_is_accent(adj, true_syllabification=False)

    adj_temp = {'adj': 'masc,fem,neuter', 'comparative': '', 'adverb': '', 'adverb_comparative': ''}

    adj_forms = []
    # most basic case -os

    if adj[-2:] in ['ός', 'ος']:
        masc = adj
        adj_forms.append(masc)

        if accent == 'ultimate':
            fem = adj[:-2] + 'ή'
        else:
            fem = adj[:-2] + 'η'

        if adj[-3] in vowels and count_syllables(adj) <= 2:
            if accent == 'ultimate':
                fem = adj[:-2] + 'ά'
            else:
                fem = adj[:-2] + 'α'

        elif adj[-3] in vowels and count_syllables(adj) > 2 and not is_accented(modern_greek_syllabify(adj)[-3]):
            if accent == 'ultimate':
                fem = adj[:-2] + 'ά'
            else:
                fem = adj[:-2] + 'α'

        if adj[-3] in ['κ', 'θ', 'χ']:

            if accent == 'ultimate':
                fem_alt = adj[:-2] + 'ιά'
            else:
                fem_alt = adj[:-2] + 'ια'

            if fem in greek_corpus and fem_alt in greek_corpus:
                fem = fem + ',' + fem_alt

            elif fem not in greek_corpus and fem_alt in greek_corpus:
                fem = fem_alt

            elif fem in greek_corpus and fem_alt not in greek_corpus:
                fem = fem
            else:
                # for the most part forms on h should be correct, but adj is not very common, so is lacking from db
                # check for -a by looking for genitive on as in db
                if accent == 'ultimate':
                    gen = adj[:-2] + 'άς'
                    beta_fem = adj[:-2] + 'ά'
                else:
                    gen = adj[:-2] + 'ας'
                    beta_fem = adj[:-2] + 'α'

                if gen in greek_corpus:
                    fem = beta_fem

            # if it's lacking from the db, still the best guess is to leave the form on -h

        adj_forms.append(fem)

        neuter = adj[:-1]

        adj_forms.append(neuter)

    elif adj[-2:] in ['ής', 'ης']:

        # first check which type
        stem = adj[:-2]
        if stem + 'ικο' in greek_corpus:
            # type hs, a, iko, here accent is always on the last syllable of the stem
            masc = adj
            fem = stem + 'α'
            if stem + 'ισσα' in greek_corpus:
                fem = stem + 'ισσα'
            neuter = stem + 'ικο'

        elif where_is_accent(adj) == 'ultimate' and (stem + 'ὶ' in greek_corpus or stem + 'ιά' in greek_corpus):
            # type, hs, ia, i, mostly colors

            masc = adj
            fem = put_accent(stem + 'ια', accent)
            neuter = put_accent(stem + 'ι', accent)

        elif put_accent(stem + 'ους', accent, true_syllabification=False) in greek_corpus:
            # type hs, hs, es
            masc, fem = adj, adj
            neuter = put_accent(stem + 'ες', accent, true_syllabification=False)
            if accent != 'ultimate' and neuter not in greek_corpus:
                neuter = put_accent(stem + 'ες', 'antepenultimate', true_syllabification=False)

        elif stem + 'ού' in greek_corpus:
            # type kafetzhs kafetzou, but is it a adj?
            masc = adj
            fem = adj[:-2] + 'ού'
            neuter = adj[:-1] + 'δικο'

        else:
            """
            In cases where my corpus cannot help me, I will surmise that it's hs, a (or issa), iko
            """

            if accent == 'penultimate':
                if adj.endswith('ώδης'):
                    masc, fem = adj, adj
                    neuter = stem + 'ες'
                else:
                    masc = adj
                    fem = stem + 'α'
                    if stem + 'ισσα' in greek_corpus:
                        fem = stem + 'ισσα'
                    neuter = stem + 'ικο'
            elif accent == 'ultimate':
                masc, fem = adj, adj
                neuter = stem + 'ές'

    elif adj[-3:] == 'ους':
        masc, fem = adj, adj
        neuter = adj[:-1] + 'ν'

    elif adj[-2:] in ['υς', 'ύς'] or adj in ['γλυκύ']:
        # my database is unfortunately not that great...
        stem = adj[:-2]
        masc = adj
        neuter = adj[:-1]
        if adj in ['γλυκύ']:
            # unfortunately there are some mistakes in my word list wherever forms are given as lemma
            # and so I have to correct them in this way
            stem = adj[:-1]
            masc = adj + 'ς'
            neuter = adj

        fem = stem + 'ιά'

        if fem + 'ς' not in greek_corpus:
            # look for gen because nom fem can be mistaken for acc pl
            fem_eia = stem + 'εία'
            if fem_eia in greek_corpus:
                fem = fem_eia
            if adj[-5:] == 'πολύς':
                fem = adj[:-5] + 'πολλή'

    elif adj[-2:] in ['ων', 'ών']:
        stem = adj[:-2]
        masc = adj
        fem = None
        neuter = None
        if accent == 'penultimate' or not accent:
            fem = stem + 'ουσα'

            neuter = stem + 'ον'

        if accent == 'ultimate' or not accent:
            fem = stem + 'ούσα'
            neuter = stem + 'ούν'
            neuter_alt_1 = stem + 'ών'
            neuter_alt_2 = stem + 'ούν'
            if neuter + 'τα' in greek_corpus or neuter + 'τες' in greek_corpus:
                fem = stem + 'ούσα'
            elif neuter_alt_1 + 'τα' in greek_corpus or neuter_alt_1 + 'τες' in greek_corpus or adj in ['ζων',
                                                                                                        'κυβερνών',
                                                                                                        'επιζών']:

                fem = stem + 'ώσα'
                neuter = neuter_alt_1
            elif neuter_alt_2 + 'τα' in greek_corpus or neuter_alt_2 + 'τες' in greek_corpus or neuter_alt_2 + 'των' in greek_corpus:
                fem = stem + 'ούσα'
                neuter = neuter_alt_2
            if not accent:
                neuter = remove_all_diacritics(neuter)

        # it is also possible, that there are wn, onos
        if adj[:-2] + 'ονος' in greek_corpus:
            masc, fem = adj, adj
            neuter = adj[:-2] + 'ον'

    elif adj[-3:] == 'είς':
        # passive aorist participles
        if not adj[:-3] + 'έντα' in greek_corpus:
            raise NotLegalAdjectiveException
        masc = adj
        fem = adj[:-1] + 'σα'
        neuter = adj[:-3] + 'έν'

    elif adj[-2:] in ['ας', 'άς']:

        # pas, pasa pan and active aorist participles
        # pas pasa pan

        pl_nta = adj[:-1] + 'ντα'
        fem_sa = adj[:-1] + 'σα'

        if count_syllables(adj) == 1:
            pl_nta = put_accent(pl_nta, 'penultimate')
            fem_sa = put_accent(fem_sa, 'penultimate')
        if pl_nta in greek_corpus:
            masc = adj
            fem = fem_sa
            neuter = adj[:-1] + 'ν'

        elif adj in ['μέλας']:
            masc = adj
            fem = adj[:-2] + 'αινα'
            neuter = adj[:-1] + 'ν'

        elif adj == 'μέγας':
            masc = adj
            fem = 'μαγάλη'
            neuter = 'μέγα'

        elif adj[-4:] == 'ονας':
            masc = adj
            fem = adj[:-4] + 'ων'
            neuter = adj[:-2]

        elif where_is_accent(adj) == 'ultimate':
            masc = adj
            fem = adj[:-2] + 'ού'
            neuter = adj[:-1] + 'δικο'
        else:
            raise NotLegalAdjectiveException

    elif adj in ['προβεβηκώς', 'κεχηνώς', 'τεθνεώς', 'αφεστώς', 'ἐνεστώς']:
        masc = adj
        fem = adj[:-1] + 'σα'
        neuter = adj
        # rare but sometimes ancient perf participle

    elif adj in ['άρρην']:
        # so rare that it can be solved like that
        masc = adj
        fem = adj
        neuter = masc[:-2] + 'εν'

    elif adj in ['περίφροντις', 'φέρελπις', 'άφροντις', 'φιλόπατρις', 'μόνορχις', 'παλίμπαις', 'πολύφροντις',
                 'αρνησίπατρις', 'άπολις', 'άπατρις', 'αφιλόπατρις', 'ενήλιξ', 'πυρρόθριξ', 'δασύθριξ', 'ουλόθριξ',
                 'κεντρόφυξ', 'πυρρόθριξ', 'υπερήλιξ', 'βλαξ', 'ομήλιξ', 'υπερμέτρωψ', 'κεντρόφυξ', 'μεσήλιξ']:
        masc, fem = adj, adj
        neuter = '-'

    elif adj in ['εύχαρις', 'επίχαρις', 'άχαρις']:
        masc, fem = adj, adj
        neuter = adj[:-1]

    elif adj in ['ίλεως']:
        masc, fem = adj, adj
        neuter = adj[:-1] + 'ν'

    else:
        masc, fem, neuter = adj, adj, adj

    if inflection == 'aklito':
        masc, fem, neuter = adj, adj, adj

    adj_forms = [masc, fem, neuter]

    adj_temp['adj'] = '/'.join(adj_forms)

    # παραθετικά

    stem = neuter
    if stem[-1] == 'ς':
        stem = stem[:-1] + 'σ'
    parathetika = None
    alt_parathetiko = None
    uperthetiko = '-'
    alt_uperthetiko = None

    parathetiko = put_accent_on_the_antepenultimate(stem + 'τερος')

    if parathetiko not in greek_corpus:
        parathetiko = None
    else:
        uperthetiko = put_accent_on_the_antepenultimate(parathetiko[:-5] + 'τατος')

        if uperthetiko not in greek_corpus:
            uperthetiko = '-'

    if neuter[-1] in ['ο', 'ό']:
        alt_parathetiko = remove_all_diacritics(neuter[:-1]) + 'ύτερος'
        if alt_parathetiko not in greek_corpus:
            alt_parathetiko = None
        else:
            alt_uperthetiko = put_accent_on_the_antepenultimate(alt_parathetiko[:-5] + 'τατος')
            if alt_uperthetiko not in greek_corpus:
                alt_uperthetiko = '-'

    if parathetiko and alt_parathetiko:
        parathetika = parathetiko + ',' + alt_parathetiko + '/' + uperthetiko + ',' + alt_uperthetiko

    elif parathetiko:
        parathetika = parathetiko + '/' + uperthetiko

    elif alt_parathetiko and alt_uperthetiko:
        parathetika = alt_parathetiko + '/' + alt_uperthetiko

    if neuter in irregular_comparatives.keys():
        parathetiko = irregular_comparatives[neuter].split('/')[0]
        uperthetiko = irregular_comparatives[neuter].split('/')[1]
        alt_parathetiko, alt_uperthetiko = None, None
        parathetika = irregular_comparatives[neuter]

    if parathetika:
        adj_temp['comparative'] = parathetika

    # επιρρήματα

    alt_adv = None

    if neuter[-1] in ['ο', 'ό']:
        accent = where_is_accent(neuter)
        if accent != 'ultimate':
            adverb = neuter[:-1] + 'α'
            alt_adv = put_accent_on_the_penultimate(neuter[:-1] + 'ως', true_syllabification=False)
        else:
            adverb = neuter[:-1] + 'ά'
            alt_adv = neuter[:-1] + 'ώς'

    elif masc[-2:] in ['ής', 'ης'] and neuter[-2:] in ['ές', 'ες']:

        adverb = remove_all_diacritics(neuter[:-2]) + 'ώς'
        if adverb not in greek_corpus and neuter[:-2] + 'ως' in greek_corpus:
            adverb = neuter[:-2] + 'ως'
        alt_adv = neuter[:-2] + 'ά'

    elif neuter[-1] in ['υ', 'ύ'] and masc[-1] == 'ς':
        # it should have the ancient form on ews
        adverb = put_accent_on_the_penultimate(neuter[:-1] + 'εως')
        if adverb not in greek_corpus:
            adverb = adj_forms[1]
    elif neuter[-1] == 'ί':
        # colors
        adverb = put_accent_on_the_ultimate(adj_forms[2] + 'α')

    elif (masc[-2:] in ['ας', 'άς', 'ων', 'ών'] or masc[-3:] in ['εις', 'είς']) and fem[-2:] == 'σα' and neuter[
        -1] == 'ν':
        # ancient adverbs
        adverb = put_accent_on_the_penultimate(neuter + 'τως')
    else:
        # for aklita
        adverb = neuter

    if neuter in ['λίγο', 'πολύ', 'ήσσον', 'κάλλιον']:
        adverb = neuter

    # special cases
    if neuter in ['μέγα', 'μεγάλο']:
        # special case
        adverb = 'μέγα'
        alt_adv = 'μεγάλως'

    elif (masc[-4:] == 'ονας' or masc[-2:] == 'ων') and fem[-2:] == 'ων':
        adverb = None

    elif masc in ['άρρην', 'μέλας']:
        adverb = None

    epirrimata = [e for e in [adverb, alt_adv] if e and e in greek_corpus]

    epirrimata = ','.join(epirrimata)
    if epirrimata:
        adj_temp['adverb'] = epirrimata

    # comparative epirrimata
    adv_parathetika = None

    adverb_parathetiko = alt_adverb_parathetiko = adverb_uperthetiko = alt_adverb_uperthetiko = ''

    if parathetiko:
        adverb_parathetiko = parathetiko[:-2] + 'α'
        if uperthetiko != '-':
            adverb_uperthetiko = ','.join([yp[:-2] + 'α' for yp in uperthetiko.split(',')])
        else:
            adverb_uperthetiko = '-'

    if alt_parathetiko:
        alt_adverb_parathetiko = alt_parathetiko[:-2] + 'α'
        if alt_uperthetiko:

            alt_adverb_uperthetiko = alt_uperthetiko[:-2] + 'α'
        else:
            alt_adverb_uperthetiko = '-'

    if parathetiko and alt_parathetiko:
        adv_parathetika = adverb_parathetiko + ',' + alt_adverb_parathetiko + '/' + adverb_uperthetiko + ',' + alt_adverb_uperthetiko

    elif parathetiko:
        adv_parathetika = adverb_parathetiko + '/' + adverb_uperthetiko
    elif alt_parathetiko:
        adv_parathetika = alt_adverb_parathetiko + '/' + alt_adverb_uperthetiko

    if neuter in irregular_comparative_adverbs.keys():
        adv_parathetika = irregular_comparative_adverbs[neuter]

    if adv_parathetika:
        adj_temp['adverb_comparative'] = adv_parathetika

    return adj_temp
コード例 #5
0
def create_basic_paratatikos_forms(pres_form, root, pres_conjugation, deponens=False, not_deponens=True, modal_act=False, modal_med=False):
    paratatikos_basic_forms = None
    if not_deponens:
        act_par, pass_par = [], []
        if pres_conjugation == 'con1_act':
            not_augmented_par = root + 'α'
            act_par = add_augment(not_augmented_par)

            act_par = [f for f in act_par if not (count_syllables(
                f) == 2 and f[0] not in vowels)]
            pass_par = [put_accent_on_the_penultimate(root + 'όμουν')]

        elif pres_conjugation == 'con2a_act':
            act_par = [root + 'ούσα', put_accent_on_the_antepenultimate(root + 'αγα')]
            pass_par = [root + 'ιόμουν', root + 'άμην']

        elif pres_conjugation in ['con2b_act', 'con2d_act']:
            act_par = [root + 'ούσα']
            pass_par = [root + 'ούμουν']
            if pres_conjugation == 'con2b_act' and root[-1] == 'ι':
                pass_par.append(root + 'όμουν')

        elif pres_conjugation == 'con2c_act':
            not_augmented_par = root + 'γα'
            act_par = add_augment(not_augmented_par)
            pass_par = [put_accent_on_the_penultimate(root + 'γόμουν')]

        elif pres_conjugation == 'eimai':
            act_par = ['ήμουν']

        act_par_all = [f for f in act_par if f in greek_corpus]
        if not act_par_all:

            act_par_all_3rd = [f for f in act_par if f[:-1] + 'ε' in greek_corpus]
            if act_par_all_3rd:
                act_par_all = [f[:-1] + 'α' for f in act_par_all_3rd]

        pass_par = [f for f in pass_par if f in greek_corpus]
        act_par = ','.join(act_par_all)
        pass_par = ','.join(pass_par)

        paratatikos = '/'.join([act_par, pass_par])
        if root[-3:] == 'ποι':
            paratatikos = root + 'ούσα/' + root + 'ούμουν' + ',' + root + 'όμουν'

        paratatikos_basic_forms = paratatikos

    elif deponens:
        pass_par = []
        root = remove_all_diacritics(root)
        if pres_conjugation == 'con1_pass':
            pass_par = [root + 'όμουν']
        elif pres_conjugation == 'con2a_pass':
            pass_par = [root + 'ιόμουν', root + 'ούμουν', root + 'όμουν']
        elif pres_conjugation == 'con2b_pass':
            pass_par = [root + 'ούμουν', root + 'ιόμουν']
        elif pres_conjugation in ['con2c_pass', 'con2ab_pass']:
            pass_par = [root + 'όμουν']
        elif pres_conjugation == 'con2d_pass':
            pass_par = [put_accent_on_the_penultimate(root + 'μην'), root[:-1] + 'όμουν', root + 'όμουν']
            pass_par.extend(add_augment(pass_par[0]))
        elif pres_conjugation == 'con2e_pass':
            pass_par = [root + 'άμην', root + 'όμουν']
        pass_par = [f for f in pass_par if f in greek_corpus]
        pass_par = list(set(pass_par))
        pass_par = ','.join(pass_par)
        if root[-3:] == 'ποι':
            pass_par = root + 'ούμουν,' + root + 'όμουν'
        paratatikos_basic_forms = '/' + pass_par

    elif modal_act:
        parat_act_forms = []
        if pres_form[-3:] == 'άει':
            parat_act_forms = add_augment(pres_form[:-3] + 'ούσε')
            parat_act_forms.extend(add_augment(pres_form[:-3] + 'αγε'))
        elif pres_form[-3:] == 'ά':
            parat_act_forms = add_augment(pres_form[:-1] + 'ούσε')
            parat_act_forms.extend(add_augment(pres_form[:-1] + 'γε'))
        elif pres_form[-2:] == 'ει':
            parat_act_forms = add_augment(pres_form[:-2] + 'ε')
        elif pres_form[-2:] == 'εί':
            parat_act_forms = add_augment(pres_form[:-2] + 'ούσε')

        parat_act_forms = [f for f in parat_act_forms if f in greek_corpus]
        parat_act_forms = ','.join(parat_act_forms)

        paratatikos_basic_forms = parat_act_forms + '/'

    elif modal_med:
        parat_med_forms = ''
        if pres_form[-5:] == 'ιέται':
            parat_med_forms = [root + 'ιόταν']

        elif pres_form[-5:] == 'είται':
            parat_med_forms = add_augment(root + 'είτο')
            parat_med_forms.extend([root + 'ούνταν'])

        elif pres_form[-4:] == 'άται':
            parat_med_forms = [root + 'άτο', root + 'όταν', root + 'ιόταν']

        elif pres_form[-4:] == 'εται':
            parat_med_forms = [put_accent_on_the_penultimate(root + 'όταν'), root + 'ετο']
        elif pres_form[-5:] == 'ειται':
            parat_med_forms = [root + 'ειτο']
            parat_med_forms.extend(add_augment(parat_med_forms[0]))
            parat_med_forms = [put_accent_on_the_antepenultimate(v) for v in parat_med_forms]

        parat_med_forms = [f for f in parat_med_forms if f in greek_corpus]
        parat_med_forms = ','.join(parat_med_forms)
        paratatikos_basic_forms = '/' + parat_med_forms

    return paratatikos_basic_forms
コード例 #6
0
def create_basic_aorist_forms(pres_form, act_root, passive_root, deponens=False, not_deponens=True, intransitive_active=False, modal_act=False, modal_med=False):
    aorist_basic_forms = None
    active_aor_forms, passive_aor_forms = [], []

    if not_deponens:

        for ir_verb in irregular_active_aorists:
            length_ir_verb = len(ir_verb)
            if len(pres_form) >= length_ir_verb and pres_form[-length_ir_verb:] == ir_verb:
                active_aor_forms.extend(add_augment(pres_form[:-length_ir_verb] + irregular_active_aorists[ir_verb]))
                if irregular_active_aorists[ir_verb][-4:] == 'βηκα':
                    # add archaic athematic aorist for compounds with bainw
                    active_aor_forms.extend(add_augment(pres_form[:-length_ir_verb] + irregular_active_aorists[ir_verb][:-2]))
        for ir_verb in irregular_passive_aorists:
            length_ir_verb = len(ir_verb)
            if len(pres_form) >= length_ir_verb and pres_form[-length_ir_verb:] == ir_verb:
                passive_aor_forms.extend(add_augment(pres_form[:-length_ir_verb] + irregular_passive_aorists[ir_verb]))

        if act_root:

            if ',' in act_root:
                for stem in act_root.split(','):
                    active_aor_forms.extend(add_augment(stem + 'α'))
            else:
                active_aor_forms.extend(add_augment(act_root + 'α'))

            if pres_form[-3:] == 'έχω':
                active_aor_forms.extend([pres_form[:-3] + 'είχα'])
                archaic_aor_form = add_augment(pres_form[:-3] + 'σχον')
                active_aor_forms.extend(archaic_aor_form)

            # filter_out
            active_aor_forms = [f for f in active_aor_forms if f in greek_corpus]

            # there are at least two instances where this algorithm can be confused by irregular imperative forms
            irregular_imperative_similar_to_aorist = ('ανέβα', 'κατέβα', 'τρέχα', 'φεύγα')

            active_aor_forms = list(set(active_aor_forms).difference(irregular_imperative_similar_to_aorist))

            # special case for poiw
            if 'ποιήσ' in act_root:
                active_aor_forms.append(put_accent_on_the_antepenultimate(act_root + 'α', true_syllabification=False))

        if passive_root or passive_aor_forms:
            if passive_root and ',' in passive_root:

                for stem in passive_root.split(','):
                    pass_aor_form = stem + 'ηκα'

                    passive_aor_forms.append(put_accent_on_the_antepenultimate(pass_aor_form))
                    # archaic passive on purpose 3rd person, because it's more popular and so more probable that exists in corpus
                    archaic_passive_aor = stem + 'η'
                    archaic_passive_aor = add_augment(archaic_passive_aor)
                    passive_aor_forms.extend(archaic_passive_aor)

            elif passive_root:
                pass_aor_form = passive_root + 'ηκα'
                pass_aor_form = put_accent_on_the_antepenultimate(pass_aor_form)
                passive_aor_forms.append(pass_aor_form)
                # archaic passive on purpose 3rd person, because it's more popular and so more probable that exists in corpus
                archaic_passive_aor = passive_root + 'η'
                archaic_passive_aor = add_augment(archaic_passive_aor)
                passive_aor_forms.extend(archaic_passive_aor)

                if 'ποιηθ' in passive_root:
                    passive_aor_forms.append(
                        put_accent_on_the_antepenultimate(passive_root + 'ηκα', true_syllabification=False))
            # filter out

            passive_aor_forms = [f for f in passive_aor_forms if f in greek_corpus]

        # if active_aor_forms:
        active_aor_forms = list(set(active_aor_forms))
        active_aor_forms = ','.join(active_aor_forms)
        # if passive_aor_form:
        passive_aor_forms = list(set(passive_aor_forms))
        passive_aor_forms = ','.join(passive_aor_forms)

        aorist_basic_forms = '/'.join([active_aor_forms, passive_aor_forms])

    elif deponens:
        if pres_form[-7:] in deponens_with_active_perf_forms:
            passive_root = act_root
        if passive_root:

            if ',' in passive_root:
                passive_aor_forms = []
                for stem in passive_root.split(','):
                    pass_aor_form = stem + 'ηκα'
                    passive_aor_forms.extend(put_accent_on_the_antepenultimate(pass_aor_form))

                    # archaic passive on purpose 3rd person, because it's more popular and so more probable that exists in
                    # corpus
                    archaic_passive_aor = stem + 'η'
                    archaic_passive_aor = add_augment(archaic_passive_aor)
                    passive_aor_forms.extend(archaic_passive_aor)

            else:
                passive_aor_forms = passive_root + 'ηκα'

                passive_aor_forms = [put_accent_on_the_antepenultimate(passive_aor_forms)]
                # archaic passive
                archaic_passive_aor = passive_root + 'ην'
                archaic_passive_aor = add_augment(archaic_passive_aor)
                passive_aor_forms.extend(archaic_passive_aor)
            # filter out

            passive_aor_forms = [f for f in passive_aor_forms if f in greek_corpus]
            passive_aor_forms = ','.join(passive_aor_forms)

            # ginomai, erxomai, kathomai

            if pres_form[-7:] in deponens_with_active_perf_forms:
                if ',' in passive_root:
                    passive_aor_forms = []
                    for stem in passive_root.split(','):
                        pass_aor_form = stem + 'α'
                        passive_aor_forms.extend(add_augment(pass_aor_form))
                else:
                    passive_aor_forms = passive_root + 'α'
                    passive_aor_forms = add_augment(passive_aor_forms)
                passive_aor_forms = [form for form in passive_aor_forms if form in greek_corpus]
                passive_aor_forms = ','.join(passive_aor_forms)
            if 'ποιηθ' in passive_root:
                passive_aor_forms = (passive_root + 'ηκα')
            aorist_basic_forms = '/' + passive_aor_forms
            if pres_form[-7:] in deponens_with_active_perf_forms:
                aorist_basic_forms = passive_aor_forms + '/'
    elif modal_act or modal_med:
        mod_root = None
        if act_root:
            mod_root = act_root
        elif passive_root:
            mod_root = passive_root
        if mod_root:
            aor_forms = add_augment(mod_root + 'ε')
            if passive_root:
                aor_forms.extend(add_augment(mod_root + 'ηκε'))
            # mainly for symbainei
            anc_forms = add_augment(mod_root + 'η')
            anc_forms = [a for a in anc_forms if where_is_accent(a) == 'penultimate']
            aor_forms.extend(anc_forms)

            aor_forms = [f for f in aor_forms if f in greek_corpus]

            aorist_basic_forms = ','.join(aor_forms)
            if modal_act:
                aorist_basic_forms = aorist_basic_forms + '/'
            elif modal_med:
                aorist_basic_forms = '/' + aorist_basic_forms

    return aorist_basic_forms
コード例 #7
0
def create_all_basic_noun_forms(noun, inflection=None, gender=None, proper_name=False):
    """
    :param proper_name: Proper names behave differently from normal nouns, so if it is known, it should be flagged
    :param gender: In case of some nouns, gender should be given, where it cannot be correctly guessed on the basis
    of the ending
    :param inflection: It can have value None (then inflection is found automatically, or "aklito" (indeclinable)
    :param noun: must be nom sg
    :return: dictionary with keys: nom_sg, gen_sg, nom_pl and gender. Alternative forms are divided with coma
    """

    noun_temp ={'nom_sg': noun, 'gen_sg': '', 'nom_pl': '', 'gender': ''}
    number_of_syllables = count_syllables(noun, true_syllabification=False)
    accent = where_is_accent(noun, true_syllabification=False)
    ultimate_accent = accent == ULTIMATE

    capital = noun[0].isupper()
    noun = noun.lower()

    prefixes = ['νανο', 'μικρο', 'σκατο', 'παλιο']

    # on 'os'

    if noun[-2:] in ['ός', 'ος']:
        stem = noun[:-2]
        plural_form = put_accent(stem + 'οι', accent, true_syllabification=False)
        gen_form = put_accent(stem + 'ου', accent, true_syllabification=False)
        if remove_diaer(plural_form) == put_accent(stem + 'οι', accent):
            plural_form = remove_diaer(plural_form)

        if remove_diaer(gen_form) == put_accent(stem + 'ου', accent):
            gen_form = remove_diaer(gen_form)

        gens_sg = []

        noun_temp['gender'] = 'masc'

        # the problem is that many long words on -os that are part of some kind of jargon and do not have any other form
        # declined in the corpus, i will assume then that words above 4 syllables do exist, but only in singular, the
        # same should be the case for neuter long words on -o
        # also some proper names in greek_corpus are, as is proper, capitalized
        if gen_form in greek_corpus or gen_form.capitalize() in greek_corpus or gender == 'masc' or number_of_syllables >4:
            gens_sg.append(gen_form)

        if accent == ANTEPENULTIMATE:
            gen_form_alt = put_accent(gen_form, PENULTIMATE, true_syllabification=False)
            if gen_form_alt in greek_corpus:
                gens_sg.append(gen_form_alt)

        noun_temp['gen_sg'] = ','.join(gens_sg)

        if plural_form in greek_corpus or plural_form.capitalize() in greek_corpus or number_of_syllables > 4 or gender == 'masc':
            noun_temp['nom_pl'] = plural_form
            if not gens_sg:
                noun_temp['gen_sg'] = gen_form

        if noun in feminine_os:
            noun_temp['gender'] = 'fem'

        if noun in feminine_or_masc or (noun[-5:] == 'λόγος' and number_of_syllables > 3):
            # especially all kinds of professionals
            noun_temp['gender'] = 'fem,masc'

        if not noun_temp['nom_pl'] and not gens_sg or gender == 'neut':
            # maybe its neuter like lathos
            plural_form = stem + 'η'
            gen_form = stem + 'ους'

            if accent == ULTIMATE:
                plural_form = stem + 'ή'
                gen_form = stem + 'ούς'
            elif accent == ANTEPENULTIMATE:
                plural_form = put_accent_on_the_penultimate(plural_form)
                gen_form = put_accent_on_the_penultimate(gen_form)

            if plural_form in greek_corpus or gen_form in greek_corpus or gender == 'neut':
                noun_temp['nom_pl'] = plural_form
                noun_temp['gen_sg'] = gen_form
                noun_temp['gender'] = 'neut'

            # γεγονός και άλλες μετοχές τού παρακειμένου
            plural_form = noun[:-1] + 'τα'
            gen_form = noun[:-1] + 'τος'
            if plural_form in greek_corpus or gen_form in greek_corpus:
                noun_temp['nom_pl'] = plural_form
                noun_temp['gen_sg'] = gen_form
                noun_temp['gender'] = 'neut'

        # in all other instances probably they are correct masculine words, but don't occur in the corpus, still for
        # proper name don't add plural if it doesn't exist in the corpus
        if not (noun_temp['nom_pl'] or noun_temp['gen_sg']):

            stem = noun[:-2]
            plural_form = stem + 'οι'
            gen_form = put_accent(stem + 'ου', accent, true_syllabification=False)
            noun_temp['gender'] = 'masc'
            if accent == ULTIMATE:
                plural_form = stem + 'οί'
                gen_form = stem + 'ού'
            if not proper_name:
                noun_temp['nom_pl'] = plural_form
            noun_temp['gen_sg'] = gen_form
            noun_temp['gender'] = 'masc'

    elif noun[-1] == 'ς' and \
            ((noun[:-1] + 'δες' in greek_corpus) or (put_accent_on_the_antepenultimate(noun[:-1] + 'δες') in
                                                     greek_corpus)) and noun[-2:] != 'ις':
        # imparisyllaba on des, archaic and modern

        noun_temp['gender'] = 'masc'
        noun_temp['gen_sg'] = noun[:-1]
        plurals = []
        plural_form = noun[:-1] + 'δες'
        # sometimes the accent has to be moved, and sometimes there are alternatives
        plural_form_a = put_accent_on_the_antepenultimate(plural_form)
        plural_form_b = put_accent_on_the_penultimate(plural_form)

        if plural_form in greek_corpus:
            plurals.append(plural_form)
        if plural_form_a in greek_corpus:
            plurals.append(plural_form_a)
        if plural_form_b in greek_corpus:
            plurals.append(plural_form_b)
        plurals = list(set(plurals))
        noun_temp['nom_pl'] = ','.join(plurals)
        gen_form = noun[:-1]
        gen_form_a = put_accent_on_the_penultimate(gen_form)
        gen_form_arch = noun[:-1] + 'δος'
        if count_syllables(noun) == 1:
            gen_form_arch = put_accent_on_the_ultimate(gen_form_arch)

        gens = []
        if gen_form in greek_corpus:
            gens.append(gen_form)
        if gen_form_a in greek_corpus:
            gens.append(gen_form_a)
        if gen_form_arch in greek_corpus:
            gens.append(gen_form_arch)
        gens = list(set(gens))
        noun_temp['gen_sg'] = ','.join(gens)

    elif noun[-2:] in ['ές', 'ες']:

        # they can be either pluralia tantum or masc on es that do not have plur in the corpus or neuter on es or aklito
        if noun[:-1] in greek_corpus or noun[:-1].capitalize() in greek_corpus:
            # this means its a gen. of a masc form
            noun_temp['gender'] = 'masc'
            noun_temp['gen_sg'] = noun[:-1]

            nom_pl = noun[:-1] + 'δες'
            if nom_pl not in greek_corpus:

                nom_pl_alt = put_accent(noun[:-2] + 'ηδες', ANTEPENULTIMATE)

                if nom_pl_alt in greek_corpus:
                    nom_pl = nom_pl_alt
            noun_temp['nom_pl'] = nom_pl

        elif put_accent(noun[:-2] + 'ους', accent) in greek_corpus:
            noun_temp['gen_sg'] = put_accent(noun[:-2] + 'ους', accent)
            noun_temp['nom_pl'] = put_accent(noun[:-2] + 'η', accent)
            noun_temp['gender'] = 'neut'

        elif (noun[:-2] + 'ων') in greek_corpus or (remove_all_diacritics(noun[:-2]) + 'ών') in greek_corpus or \
                (noun[:-2] + 'ων').capitalize() in greek_corpus or (remove_all_diacritics(noun[:-2]) + 'ών').capitalize() in greek_corpus or noun in ['προάλλες', 'πρόποδες']:

            noun_temp['gender'] = 'fem'
            if noun in ['πρόποδες', 'χοιράδες']:
                noun_temp['gender'] = 'masc'
            noun_temp['gen_sg'] = ''
            noun_temp['nom_pl'] = noun
            noun_temp['nom_sg'] = ''

        else:
            # should be neuter aklita
            noun_temp['gender'] = 'neut'
            noun_temp['gen_sg'] = noun
            noun_temp['nom_pl'] = noun
            noun_temp['nom_sg'] = noun

    elif noun[-2:] in ['άς', 'ής', 'ας', 'ης']:
        noun_temp['gender'] = 'masc'
        # es
        plural_form_a = noun[:-2] + 'ες'
        gen_form_a = noun[:-1]
        if ultimate_accent:
            plural_form_a = noun[:-2] + 'ές'
        # eas - eis,
        plural_form_b = noun[:-3] + 'είς'
        gen_form_b = noun[:-1]
        # hs, eis
        plural_form_ba = noun[:-2] + 'είς'
        gen_form_ba = noun[:-2] + 'ούς'
        # ancient forms
        plural_form_c = noun[:-1] + 'τες'
        plural_form_c_neut = noun[:-1] + 'τα'
        gen_form_c = noun[:-1] + 'τος'
        if not ultimate_accent:
            plural_form_c = put_accent_on_the_antepenultimate(plural_form_c, true_syllabification=False)
            plural_form_c_neut = put_accent_on_the_antepenultimate(plural_form_c_neut, true_syllabification=False)
            gen_form_c = put_accent_on_the_antepenultimate(gen_form_c, true_syllabification=False)

        if plural_form_c in greek_corpus and gen_form_c in greek_corpus:
            nom_pl = plural_form_c
            gen_sg = gen_form_c
            # but there is possible, that there is also more dimotiki form of gen_sg
            if gen_form_a in greek_corpus:
                gen_sg = gen_form_c + ',' + gen_form_a

        elif (plural_form_b in greek_corpus and gen_form_b in greek_corpus) and noun[-3:] not in ['ίας']:
            # the last condition is to exclude possibility, that it is false positive because of some same sounding
            # fut aorist forms
            nom_pl = plural_form_b
            gen_sg = gen_form_b

        elif plural_form_ba in greek_corpus and gen_form_ba in greek_corpus:
            nom_pl = plural_form_ba
            gen_sg = gen_form_ba

        elif plural_form_a in greek_corpus:

            nom_pl = plural_form_a

            gen_sg = gen_form_a

        elif plural_form_c_neut in greek_corpus and gen_form_c in greek_corpus:
            nom_pl = plural_form_c_neut
            gen_sg = gen_form_c
            noun_temp['gender'] = 'neut'

        else:
            nom_pl = None
            gen_sg = None

        if nom_pl:
            noun_temp['nom_pl'] = nom_pl
            noun_temp['gen_sg'] = gen_sg

        elif noun[-2:] == 'άς':
            # if corpus doesnt help, more probable is that ending in as is imparisyllaba
            noun_temp['nom_pl'] = noun[:-1] + 'δες'
            noun_temp['gen_sg'] = gen_form_a
            # there are many professions that are rarely in plural, but which do have gen, and almost all of them
            # create gen by subtracting s
        else:

            noun_temp['nom_pl'] = plural_form_a
            if noun[-3:] == 'έας':
                noun_temp['nom_pl'] = plural_form_b
            noun_temp['gen_sg'] = gen_form_a

        # lastly check maybe there are professions which can be feminine
        if noun in feminine_or_masc:
            noun_temp['gender'] = 'masc,fem'
        # and again a better test for all -eas, if there is gen sg on ews, this  has certainly femine form, this gen
        # form cannot be added as an alternative, as it occures only for feminines and has to be added in create_all
        # function
        fem_gen = noun[:-3] + 'έως'

        if noun[-3:] == 'έας' and fem_gen in greek_corpus:
            noun_temp['gender'] = 'masc,fem'

    elif noun[-3:] in ['εύς', 'ευς']:

        plural_form = noun[:-3] + 'είς'
        gen_form = noun[:-3] + 'έως'
        noun_temp['gender'] = 'masc'
        if plural_form in greek_corpus and gen_form in greek_corpus:
            noun_temp['nom_pl'] = plural_form
            noun_temp['gen_sg'] = gen_form
        if noun == 'Ζευς':

            noun_temp['gen_sg'] = 'Διός,Δίος'
            noun_temp['nom_pl'] = ''

        if noun in feminine_or_masc:
            noun_temp['gender'] = 'fem,masc'

    elif noun[-2:] in ['ώς', 'ως']:
        noun_temp['gender'] = 'neut'
        plural_form = noun[:-1] + 'τα'
        gen_form = noun[:-1] + 'τος'
        thema_ot = put_accent(noun[:-2] + 'οτ', accent)
        if count_syllables(noun) == 1:
            gen_form = put_accent_on_the_ultimate(gen_form)
            plural_form = put_accent_on_the_antepenultimate(plural_form)
        if plural_form in greek_corpus or gen_form in greek_corpus:
            noun_temp['nom_pl'] = plural_form
            noun_temp['gen_sg'] = gen_form
        # there is possibility, that the thema is on 'οτ'
        elif thema_ot + 'ος' in greek_corpus:
            noun_temp['gender'] = 'neut'
            gen_form = thema_ot + 'ος'
            plural_form = thema_ot + 'α'
            noun_temp['nom_pl'] = plural_form
            noun_temp['gen_sg'] = gen_form
        # there are also rare feminine on ως with gen on υος eg 'αιδώς'
        elif put_accent(noun[:-2] + 'ους', accent) in greek_corpus:
            gen_form = put_accent(noun[:-2] + 'ους', accent)
            noun_temp['gen_sg'] = gen_form
            noun_temp['gender'] = 'fem'
        elif noun == 'άλως':
            # its kind of exception
            gen_form = 'άλω'
            noun_temp['gen_sg'] = gen_form
            noun_temp['gender'] = 'fem'

    elif noun[-2:] in ['ις', 'ΐς', 'ίς']:
        noun_temp['gender'] = 'fem'
        plural_form = put_accent_on_the_penultimate(noun[:-2] + 'εις', true_syllabification=False)

        gen_form = noun[:-2] + 'εως'
        if noun == 'μις':
            # special case
            plural_form = noun
            gen_form = noun

        if gen_form in greek_corpus or plural_form in greek_corpus:
            noun_temp['nom_pl'] = plural_form
            noun_temp['gen_sg'] = gen_form

        else:
            # maybe gen on idos
            gen_form = noun[:-1] + 'δος'
            plural_form = noun[:-1] + 'δες'
            print(gen_form, plural_form)
            if gen_form in greek_corpus or plural_form in greek_corpus or gender == 'fem':
                noun_temp['nom_pl'] = plural_form
                noun_temp['gen_sg'] = gen_form

    elif noun[-3:] in ['ους', 'ούς']:
        if 'πλους' in noun or 'νους' in noun and noun != 'μπόνους':
            noun_temp['gender'] = 'masc'
            noun_temp['gen_sg'] = noun[:-1]
        elif noun == 'ους':
            # το αυτί χρειάζεται να είναι μόνο του
            noun_temp['gender'] = 'neut'
            noun_temp['gen_sg'] = 'ωτός'
            noun_temp['nom_pl'] = 'ώτα'
        else:
            # aklita
            noun_temp['gender'] = 'neut'
            noun_temp['gen_sg'] = noun
            noun_temp['nom_pl'] = noun

    elif noun[-2:] in ['υς', 'ύς']:
        noun_temp['gender'] = 'fem'
        gen_form = noun[:-1] + 'ος'
        thema = put_accent_on_the_ultimate(noun)
        if count_syllables(noun) == 1:
            gen_form = put_accent_on_the_ultimate(gen_form)
        plur_form = thema[:-1] + 'ες'
        if noun in ['βοτρύς','ιχθύς','πέλεκυς', 'μυς']:
            noun_temp['gender'] = 'masc'
        if noun == 'πέλεκυς':
            gen_form = 'πελέκεως'
            plur_form = 'πελέκεις'

        noun_temp['gen_sg'] = gen_form
        noun_temp['nom_pl'] = plur_form

    elif noun[-1] in ['α', 'η', 'ά', 'ή']:
        # feminina
        noun_temp['gender'] = 'fem'
        gen_a = noun + 'ς'
        noun_temp['gen_sg'] = gen_a
        plural_form_a = noun[:-1] + 'ες'
        if ultimate_accent:
            plural_form_a = noun[:-1] + 'ές'
        plural_form_b = put_accent_on_the_penultimate(noun[:-1] + 'εις', true_syllabification=False)
        plural_form_c = noun + 'δες'

        if plural_form_c in greek_corpus:
            noun_temp['nom_pl'] = plural_form_c

        elif plural_form_a in greek_corpus and plural_form_a not in ['γες']:
            # unfortunetly for some very short words it can fail, ad hoc solution is to implement some kind of a list
            noun_temp['nom_pl'] = plural_form_a

        # special case for neuter on ma
        if noun[-2:] == 'μα' and (plural_form_a not in greek_corpus or
                                  plural_form_b not in greek_corpus or
                                  plural_form_c not in greek_corpus or
                                gender == 'neut' or
                                  put_accent_on_the_antepenultimate(noun + 'τα', true_syllabification=False) in greek_corpus):
            plural_form = put_accent_on_the_antepenultimate(noun + 'τα', true_syllabification=False)
            gen_form = put_accent_on_the_antepenultimate(noun + 'τος', true_syllabification=False)
            noun_temp['nom_pl'] = plural_form
            noun_temp['gen_sg'] = gen_form
            noun_temp['gender'] = 'neut'
        elif noun[-1] == 'α' and noun + 'τος' in greek_corpus and noun + 'τα' in greek_corpus or gender == 'neut':
            # gala, galatos

            noun_temp['nom_sg'] = noun
            noun_temp['nom_pl'] = put_accent_on_the_antepenultimate(noun + 'τα')
            noun_temp['gen_sg'] = put_accent_on_the_antepenultimate(noun + 'τος')
            noun_temp['gender'] = 'neut'
            if 'γάλα' in noun:
                noun_temp['nom_pl'] = noun + 'τα' + ',' + noun + 'κτα'
                noun_temp['gen_sg'] = noun + 'τος' + ',' + noun + 'κτος'
        if (noun[-1] in ['α', 'ά'] and gen_a not in greek_corpus and plural_form_a not in greek_corpus
              and put_accent(noun[:-1] + 'ων', accent) in greek_corpus) or noun in plur_tant_neut:

            # maybe pluralia tantum
            noun_temp['nom_sg'] = ''
            noun_temp['nom_pl'] = noun
            noun_temp['gen_sg'] = ''
            noun_temp['gender'] = 'neut'

        if (noun[-2:] in ['ση', 'ξη', 'ψη'] or noun in feminine_h_eis) and put_accent_on_the_ultimate(noun[:-1] + 'ων') not in greek_corpus:
            # it has to be if, because it can be earlier falsly recognized as a correct form on es, because of som aorists
            # in sec person sg
            noun_temp['nom_pl'] = plural_form_b
            noun_temp['gen_sg'] = gen_a + ',' + put_accent_on_the_antepenultimate(noun[:-1] + 'εως', true_syllabification=False)

    elif noun[-2:] == 'ού':
        noun_temp['gender'] = 'fem'
        noun_temp['gen_sg'] = noun + 'ς'
        plural_form = noun + 'δες'
        if plural_form in greek_corpus:
            noun_temp['nom_pl'] = plural_form

    elif noun[-1] in ['ό', 'ο']:
        if noun[-3:] == 'ιμο':
            plural_form = noun[:-1] + 'ατα'
            gen_form = noun[:-1] + 'ατος'
            plural_form = put_accent_on_the_antepenultimate(plural_form)
            gen_form = put_accent_on_the_penultimate(gen_form)
            if plural_form in greek_corpus or gen_form in greek_corpus:
                noun_temp['nom_pl'] = plural_form
                noun_temp['gen_sg'] = gen_form
                noun_temp['gender'] = 'neut'

                return noun_temp

        noun_temp['gender'] = 'neut'
        plural_form = noun[:-1] + 'α'
        gen_form = noun[:-1] + 'ου'
        if ultimate_accent:
            plural_form = noun[:-1] + 'ά'
            gen_form = noun[:-1] + 'ού'
        if plural_form in greek_corpus or\
                plural_form.capitalize() in greek_corpus or\
                number_of_syllables>4 or\
                (gender not in ['fem', 'masc'] and inflection != 'aklito'):

            noun_temp['nom_pl'] = plural_form

        gens = []
        if gen_form in greek_corpus or\
                gen_form.capitalize() in greek_corpus or\
                number_of_syllables>4 :
            gens.append(gen_form)

        if accent == ANTEPENULTIMATE:
            gen_a = put_accent(gen_form, PENULTIMATE, true_syllabification=False)
            if gen_a in greek_corpus:
                gens.append(gen_a)

        if gens:
            noun_temp['gen_sg'] = ','.join(gens)
        elif gender not in ['fem', 'masc'] and inflection != 'aklito':
            noun_temp['gen_sg'] = gen_form
        else:
            # σ`αυτήν την περίπτωση υποθετούμε πως είναι ουδέτερα άκλιτα
            noun_temp['nom_pl'] = noun
            noun_temp['gen_sg'] = noun
            noun_temp['gender'] = 'neut'

    elif noun[-1] in ['ι', 'ί', 'ΐ'] and noun[-2:] not in ['οι', 'οί']:
        noun_temp['gender'] = 'neut'
        plural_form = noun + 'α'
        gen_form = put_accent_on_the_ultimate(noun + 'ου')
        if ultimate_accent:
            plural_form = put_accent_on_the_ultimate(plural_form)

        if plural_form[-3] in vowels:

            plural_form = plural_form[:-2] + 'γι' + plural_form[-1]
            gen_form = gen_form[:-3] + 'γι' + gen_form[-2:]

        # in greek corpus there are lacking some upokoristika
        if plural_form in greek_corpus or noun[-3:] in ['άκι', 'ίκι', 'άρι', 'έκι', 'ήρι', 'ίδι', 'ύρι']:
            noun_temp['nom_pl'] = plural_form

            noun_temp['gen_sg'] = gen_form

        elif inflection != "aklito":
            # if corpus doesnt help, but we know, that it's declinab
            noun_temp['nom_pl'] = plural_form
        if noun_temp['nom_pl'] == '' and noun_temp['gen_sg'] == '':
            # we conclude these are aklita, but I am sure there will be some uncovered words that do decline,
            # I have no idea though how to sort them out
            noun_temp['nom_pl'] = noun
            noun_temp['gen_sg'] = noun

    elif noun[-2:] in ['οι', 'οί']:
        # pluralis tantum masc
        noun_temp['gender'] = 'masc'
        noun_temp['nom_pl'] = noun
        noun_temp['nom_sg'] = ''
        noun_temp['gen_sg'] = ''

    # ending n is a bit tricky, so we will work it out separatly

    elif noun[-2:] in ['ον', 'όν', 'έν', 'εν', 'άν', 'αν']:
        # ουδετερα ουσιαστικά με θέμα σε -ντ, παιρνει ύποψη και τα αρχαία ουδέτερα Β' κλίσης σε -ον

        noun_temp['gender'] = 'neut'
        plural_form = noun + 'τα'

        gen_form = noun + 'τος'
        # αρχαίες λέξεις με ον
        plural_form_a = ''
        gen_form_a = ''

        if noun[-2:] in ['ον', 'όν']:
            plural_form_a = noun[:-2] + 'ά'
            gen_form_a = noun[:-2] + 'ού'

            if not ultimate_accent:
                plural_form_a = noun[:-2] + 'α'
                gen_form_a = put_accent_on_the_penultimate(gen_form_a,
                                                       true_syllabification=False)
        if not is_accented(noun):
            # μονοσύλλαβα τονίζονται στην γενική στην ληγούσα
            plural_form = put_accent_on_the_penultimate(plural_form, true_syllabification=False)
            gen_form = put_accent_on_the_ultimate(gen_form)
            if noun == 'ον': gen_form = put_accent_on_the_penultimate(gen_form)

        if plural_form in greek_corpus and gen_form in greek_corpus:

            noun_temp['nom_pl'] = plural_form
            noun_temp['gen_sg'] = gen_form

        elif plural_form_a in greek_corpus and gen_form_a in greek_corpus:
            noun_temp['nom_pl'] = plural_form_a
            noun_temp['gen_sg'] = gen_form_a
        else:
            # it is assumed it's a borrowing from french

            if noun in ['ρεσεψιόν', 'σπορτσγούμαν']:
                # there are certainly more
                noun_temp['gender'] = 'fem'
            noun_temp['nom_pl'] = noun
            noun_temp['gen_sg'] = noun

    elif noun[-2:] in ['ων', 'ών']:
        noun_temp['gender']= 'masc'

        irregular_3 = {'κύων': 'κυν', 'είρων': 'είρων', 'ινδικτιών': 'ινδικτιών'}

        # 2 possibilities
        stem_a = noun[:-2] + 'όν'
        stem_b = noun[:-2] + 'όντ'
        stem_c = noun[:-2] + 'ούντ'
        stem_d = noun[:-2] + 'ώντ'

        plural_form_a = stem_a + 'ες'
        gen_form_a = stem_a + 'ος'
        plural_form_b = stem_b + 'ες'
        gen_form_b = stem_b + 'ος'
        plural_form_c = stem_c + 'ες'
        gen_form_c = stem_c + 'ος'
        plural_form_d = stem_d + 'ες'
        gen_form_d = stem_d + 'ος'

        ir_stem = False
        if noun in irregular_3.keys():
            ir_stem = irregular_3[noun]

        if ir_stem:

            ir_pl = ir_stem + 'ες'
            ir_gen = ir_stem + 'ος'
            if count_syllables(ir_stem) == 1 and ir_gen not in greek_corpus:
                ir_pl = put_accent_on_the_antepenultimate(ir_pl)
                ir_gen = put_accent_on_the_antepenultimate(ir_gen)
                if ir_gen not in greek_corpus:
                    ir_gen = put_accent_on_the_ultimate(ir_gen)

            if ir_pl in greek_corpus and ir_gen in greek_corpus:
                noun_temp['nom_pl'] = ir_pl
                noun_temp['gen_sg'] = ir_gen

                return noun_temp

        if not ultimate_accent:
            plural_form_a = put_accent_on_the_antepenultimate(plural_form_a, true_syllabification=False)
            gen_form_a = put_accent_on_the_antepenultimate(gen_form_a, true_syllabification=False)
            plural_form_b = put_accent_on_the_antepenultimate(plural_form_b, true_syllabification=False)
            gen_form_b = put_accent_on_the_antepenultimate(gen_form_b, true_syllabification=False)

        if plural_form_a in greek_corpus and gen_form_a in greek_corpus:
            noun_temp['nom_pl'] = plural_form_a
            noun_temp['gen_sg'] = gen_form_a
        elif plural_form_b in greek_corpus and gen_form_b in greek_corpus:
            noun_temp['nom_pl'] = plural_form_b
            noun_temp['gen_sg'] = gen_form_b
        elif plural_form_c in greek_corpus and gen_form_c in greek_corpus:
            noun_temp['nom_pl'] = plural_form_c
            noun_temp['gen_sg'] = gen_form_c
        elif plural_form_d in greek_corpus and gen_form_d in greek_corpus:
            noun_temp['nom_pl'] = plural_form_d
            noun_temp['gen_sg'] = gen_form_d

    elif noun[-1] in ['ξ', 'ψ', 'τ', 'ρ',  'β', 'ν', 'δ', 'ε', 'έ', 'ζ', 'κ', 'λ', 'μ'] and \
            noun not in ['σεξ', 'σερ', 'φαξ', 'μπορ', 'μπαρ', 'μποξ'] and inflection != 'aklito':
        # not very common but existing 3rd declension nouns

        stems = []

        if noun[-1] == 'ξ':

            stems.append(noun[:-1] + 'κ')
            stems.append(noun[:-1] + 'χ')
            stems.append(noun[:-1] + 'κτ')
        elif noun[-1] == 'ψ':

            stems.append(noun[:-1] + 'π')
            stems.append(noun[:-1] + 'φ')
            stems.append(noun[:-1] + 'πτ')
            stems.append(noun[:-1] + 'β')

        elif noun[-1] == 'ρ':

            stems.append(noun)
            stems.append(noun[:-1] + 'τ')
            if noun[-2:] == 'ωρ':
                stems.append(noun[:-2] + 'ορ')
                noun_temp['gender'] = 'masc'

                if 'μήτωρ' in noun:
                    noun_temp['gender'] = 'fem'
            elif noun[-2:] == 'ώρ':
                stems.append(noun[:-2] + 'όρ')
                noun_temp['gender'] = 'masc'
            else:
                noun_temp['gender'] = 'neut'

        for stem in stems:
            plural_form = stem + 'ες'
            modern_form = stem + 'ας'
            plural_form_n = stem + 'α'
            gen_form = stem + 'ος'
            if count_syllables(stem) == 1:
                plural_form = put_accent_on_the_antepenultimate(plural_form)
                plural_form_n = put_accent_on_the_antepenultimate(plural_form_n)
                gen_form = put_accent_on_the_antepenultimate(gen_form)
                if gen_form not in greek_corpus:
                    gen_form = put_accent_on_the_ultimate(gen_form)
            elif where_is_accent(stem) == ANTEPENULTIMATE:
                gen_form = put_accent_on_the_antepenultimate(gen_form)
                plural_form = put_accent_on_the_antepenultimate(plural_form)
                plural_form_n = put_accent_on_the_antepenultimate(plural_form_n)

            if (plural_form in greek_corpus or modern_form in greek_corpus) and noun not in ['πυρ']:
                noun_temp['nom_pl'] = plural_form
                if gen_form in greek_corpus or modern_form in greek_corpus:
                    noun_temp['gen_sg'] = gen_form
                if gender:
                    noun_temp['gender'] = gender
                # it's a bit crude way to correct gender but i cannot find a better one without a comprehensive list
                # gen_pl = remove_all_diacritics(plural_form[:-2]) + 'ών'
                # if gen_pl in greek_corpus:
                #     noun_temp['gender'] = 'fem'
                return noun_temp
            else:
                if plural_form_n in greek_corpus or noun in ['έαρ']:
                    noun_temp['gender'] = 'neut'
                    noun_temp['gen_sg'] = gen_form
                    if noun not in ['έαρ']:
                        noun_temp['nom_pl'] = plural_form_n
                    return noun_temp

        # else it is assumed it's either borrowing or some substantiated other things

        noun_temp['gender'] = 'neut'
        noun_temp['nom_pl'] = noun
        noun_temp['gen_sg'] = noun
        if noun in ['σπεσιαλιτέ', 'ρεσεψιόν']:
            # there are probably more such cases
            noun_temp['gender'] = 'fem'
        if noun in ['σερ']:
            # there should be added probably a lot of proper names, but I will deal with it by using
            # a flag proper_name_gender
            noun_temp['gender'] = 'masc'

    elif noun[-1] in ['ώ', 'ω']:

        if noun in ['ηχώ', 'πειθώ', 'φειδώ', 'βάβω']:
            # ancient feminina
            noun_temp['gender'] = 'fem'

            noun_temp['gen_sg'] = noun[:-1] + 'ούς'
            if noun in ['βάβω']:
                noun_temp['gen_sg'] = noun
        elif capital or proper_name:
            # feminine proper name
            noun_temp['gender'] = 'fem'
            noun_temp['gen_sg'] = noun + 'ς'

        else:
            noun_temp['gender'] = 'neut'
            noun_temp['nom_pl'] = noun
            noun_temp['gen_sg'] = noun

    elif noun[-1] in ['υ', 'ύ']:
        # ancient 3 declension, oksy , asty
        noun_temp['gender'] = 'neut'
        if noun[-2:] in ['ου', 'ού']:
            noun_temp['nom_pl'] = noun
            noun_temp['gen_sg'] = noun
        elif noun[-1] == 'υ':
            gen_1 = noun + 'ου'
            gen_1b = put_accent_on_the_penultimate(gen_1)
            plural = noun + 'α'

            if gen_1 in greek_corpus:
                noun_temp['gen_sg'] = gen_1
            elif gen_1b in greek_corpus:
                noun_temp['gen_sg'] = gen_1b
            if plural in greek_corpus:
                noun_temp['nom_pl'] = plural

            if noun in ['άστυ', 'δόρυ']:
                noun_temp['nom_pl'] = noun[:-1] + 'η'
                noun_temp['gen_sg'] = noun[:-1] + 'εως'
            if noun in ['βράδυ']:
                noun_temp['nom_pl'] = noun[:-1] + 'ια'
                noun_temp['gen_sg'] = put_accent_on_the_ultimate(noun[:-1] + 'ιου')
            if noun in ['στάχυ', 'δίχτυ']:
                noun_temp['nom_pl'] = noun + 'α'
                noun_temp['gen_sg'] = put_accent_on_the_ultimate(noun + 'ου')
            if noun in ['δάκρυ']:
                noun_temp['nom_pl'] = noun + 'α'
                noun_temp['gen_sg'] = put_accent_on_the_penultimate(noun + 'ου', true_syllabification=False)
        elif noun[-1] in ['ύ']:
            thema = noun[:-1] + 'έ'
            gen = thema + 'ος'
            plur = thema + 'α'
            if gen in greek_corpus:
                noun_temp['gen_sg'] = gen
            if plur in greek_corpus:
                noun_temp['nom_pl'] = plur

    if not noun_temp['nom_pl'] and not noun_temp['gen_sg']:

        # aklita

        noun_temp['gender'] = 'neut'
        noun_temp['nom_pl'] = noun
        noun_temp['gen_sg'] = noun

        if noun.lower() in aklita_gender.keys():
            noun_temp['gender'] = aklita_gender[noun.lower()]

    if gender:
        noun_temp['gender'] = gender

        if gender == 'fem_pl':
            noun_temp['gender'] = 'fem'
            noun_temp['nom_sg'] = ''
            noun_temp['nom_pl'] = noun
            noun_temp['gen_sg'] = ''
        elif gender == 'masc_pl':
            noun_temp['gender'] = 'masc'
            noun_temp['nom_sg'] = ''
            noun_temp['nom_pl'] = noun
            noun_temp['gen_sg'] = ''

        elif gender == 'neut_pl':
            noun_temp['gender'] = 'neut'
            noun_temp['nom_sg'] = ''
            noun_temp['nom_pl'] = noun
            noun_temp['gen_sg'] = ''
        elif gender == 'fem_sg':
            noun_temp['gender'] = 'fem'
            noun_temp['nom_sg'] = noun
            noun_temp['nom_pl'] = ''
        elif gender == 'masc_sg':
            noun_temp['gender'] = 'masc'
            noun_temp['nom_pl'] = ''
        elif gender == 'neut_sg':
            noun_temp['gender'] = 'neut'
            noun_temp['nom_pl'] = ''

    if noun in irregular_nouns.keys():
        noun_temp = irregular_nouns[noun]

    if noun in diploklita.keys():
        noun_temp['nom_pl'] = diploklita[noun]
    if inflection == 'aklito':
        noun_temp['nom_sg'] = noun
        if not proper_name:
            noun_temp['nom_pl'] = noun
        else:
            noun_temp['nom_pl'] = ''
        noun_temp['gen_sg'] = noun

    # check one more time these, that do not have flag aklito, but are surmised to be, maybe removing a prefix we will
    # be able to find out the correct declesion type
    if inflection != "aklito" and noun_temp['nom_pl'] == noun_temp['nom_sg']:
        for prefix in prefixes:
            pr_l = len(prefix)
            if prefix in noun and prefix == noun[:pr_l]:
                res = create_all_basic_noun_forms(noun[pr_l:])
                new_res = {}
                for key in res.keys():
                    if key != 'gender':
                        new_res[key] = prefix + res[key]
                new_res['gender'] = res['gender']
                noun_temp = new_res
                break

    if capital:
        noun_temp = capitalize_basic_forms(noun_temp)

    return noun_temp
コード例 #8
0
def create_all_pers_forms(conjugation_name,
                          root,
                          active_root=None,
                          deaugmented_root=None,
                          simple_aor=False):
    """
    :param conjugation_name: conjugation name
    :param root: verb root
    :param active_root: if deponens, then should be given if it's a special case
    :param deaugmented_root: root without augment
    :param simple_aor: sygmatic aorist
    :return:
    """
    forms = {}

    if not conjugation_name or conjugation_name in [
            'modal', 'con1_pass_modal'
    ]:
        return 'modal'
    endings = conjugations[conjugation_name]

    for number in endings.keys():
        forms[number] = {}
        for person in endings[number].keys():
            forms[number][person] = []
            for ending in endings[number][person]:
                form = root + ending
                if count_syllables(
                        ending) == 2 and ending == remove_all_diacritics(
                            ending):
                    form = put_accent_on_the_antepenultimate(form)
                forms[number][person].append(form)

    # check if a verb in 2nd conjugation active has alternative endings belonging to other type of the 2nd con

    if conjugation_name in ['con2a_act', 'imper_act_cont_2a']:
        if root + 'είς' in greek_corpus and root + 'εί' in greek_corpus:
            endings = conjugations['con2b_act']
            if conjugation_name == 'imper_act_cont_2a':
                endings = conjugations['imper_act_cont_2b']
            for number in endings:
                for person in endings[number]:
                    for alt_ending in endings[number][person]:
                        forms[number][person].append(root + alt_ending)

    if conjugation_name in ['con2b_act', 'imper_act_cont_2b']:
        if root + 'άς' in greek_corpus and root + 'άει' in greek_corpus:
            endings = conjugations['con2a_act']
            if conjugation_name == 'imper_act_cont_2b':
                endings = conjugations['imper_act_cont_2a']
            for number in endings:
                for person in endings[number]:
                    for alt_ending in endings[number][person]:
                        forms[number][person].append(root + alt_ending)

    if simple_aor:
        for number in endings.keys():
            forms[number] = {}
            for person in endings[number].keys():
                forms[number][person] = []
                for ending in endings[number][person]:

                    if deaugmented_root and count_syllables(ending) > 1:
                        form = put_accent_on_the_antepenultimate(
                            deaugmented_root + ending)
                        forms[number][person].append(form)
                    else:
                        form = put_accent_on_the_antepenultimate(root + ending)

                        forms[number][person].append(form)

                    if conjugation_name == 'arch_pass_aor' and number == 'sg':
                        forms[number][person][
                            0] = put_accent_on_the_penultimate(
                                forms[number][person][0])

                    if form != put_accent_on_the_antepenultimate(
                            form, true_syllabification=False):
                        if deaugmented_root:
                            form = deaugmented_root + ending
                        forms[number][person].append(
                            put_accent_on_the_antepenultimate(
                                form, true_syllabification=False))

    if conjugation_name in ['con1_pass']:
        forms['pl']['pri'][0] = put_accent_on_the_antepenultimate(
            forms['pl']['pri'][0])
        forms['pl']['sec'][1] = put_accent_on_the_antepenultimate(
            forms['pl']['sec'][1])

    elif conjugation_name in ['parat1_pass']:
        forms['pl']['ter'][0] = put_accent_on_the_antepenultimate(
            forms['pl']['ter'][0])

    elif conjugation_name in ['parat2d_pass', 'parat2b_logia', 'parat2b_pass']:
        # add augment to archaic forms
        forms_ind_with_augmented_forms = forms.copy()
        for number in forms.keys():
            for person in forms[number]:
                for form in (forms[number][person]):
                    augmented_forms = add_augment(form)
                    for augmented_form in augmented_forms:

                        if remove_all_diacritics(
                                augmented_form) == augmented_form:
                            augmented_form = put_accent_on_the_antepenultimate(
                                augmented_form)
                        if augmented_form in greek_corpus:

                            if augmented_form not in forms[number][person]:
                                forms_ind_with_augmented_forms[number][
                                    person].append(augmented_form)

        forms = forms_ind_with_augmented_forms

    elif conjugation_name in ['con2d_pass']:
        for number in forms.keys():
            for person in forms[number]:
                for index, form in enumerate(forms[number][person]):
                    forms[number][person][
                        index] = put_accent_on_the_antepenultimate(form)

    elif conjugation_name in ['con2b_act', 'con2c_act', 'imper_act_aor_c']:
        for number in forms.keys():
            for person in forms[number]:
                for index, form in enumerate(forms[number][person]):
                    if count_syllables(form) == 1:
                        forms[number][person][index] = remove_all_diacritics(
                            form)

    elif conjugation_name in [
            'imper_act_cont_1', 'imper_act_cont_2c', 'imper_act_aor_a',
            'imper_act_aor_b'
    ]:
        forms['sg']['sec'][0] = put_accent_on_the_antepenultimate(
            forms['sg']['sec'][0])

    elif conjugation_name in ['imper_pass_aor_a']:
        if active_root and active_root[-1] in ['σ', 'ψ', 'ξ']:
            forms['sg']['sec'][0] = active_root + 'ου'
        else:
            passive_aorist_recreated = create_imp_pass(root)
            forms['sg']['sec'][0] = passive_aorist_recreated

    elif conjugation_name in ['imper_act_cont_2a']:
        forms['sg']['sec'][0] = put_accent_on_the_penultimate(
            forms['sg']['sec'][0])
        forms['sg']['sec'][1] = put_accent_on_the_antepenultimate(
            forms['sg']['sec'][1])
        if len(forms['sg']['sec']) == 3:
            forms['sg']['sec'][2] = put_accent_on_the_penultimate(
                forms['sg']['sec'][2])
        # accent
        if forms['sg']['sec'][0] != put_accent_on_the_penultimate(
                forms['sg']['sec'][0], true_syllabification=False):
            forms['sg']['sec'].append(
                put_accent_on_the_penultimate(forms['sg']['sec'][0],
                                              true_syllabification=False))

    elif conjugation_name in ['con2e_pass']:
        forms['pl']['pri'][0] = put_accent_on_the_antepenultimate(
            forms['pl']['pri'][0])
        forms['pl']['pri'][1] = put_accent_on_the_antepenultimate(
            forms['pl']['pri'][1])
        forms['pl']['sec'][1] = put_accent_on_the_penultimate(
            forms['pl']['sec'][1])
    elif conjugation_name in ['imper_act_aor_ca', 'imper_act_cont_2b']:
        if root == 'ζ':
            forms['sg']['ter'] = ['ζήτω']
        forms['sg']['sec'][0] = put_accent_on_the_penultimate(
            forms['sg']['sec'][0])
        if len(forms['sg']['sec']) == 3:
            forms['sg']['sec'][1] = put_accent_on_the_penultimate(
                forms['sg']['sec'][1])
            forms['sg']['sec'][2] = put_accent_on_the_antepenultimate(
                forms['sg']['sec'][2])

    #### irregular imperatives
    if conjugation_name[:5] == 'imper':

        if root in irregular_imperative_forms:
            for number in irregular_imperative_forms[root]:
                for person in irregular_imperative_forms[root][number]:
                    irregular_form = irregular_imperative_forms[root][number][
                        person]
                    try:
                        forms[number][person].append(irregular_form)
                        # in this case check validity of all imperative forms
                        forms[number][person] = [
                            form for form in forms[number][person]
                            if form in greek_corpus
                        ]
                    except:
                        print(sys.exc_info()[0])
    return forms