def put_accent_on_unaccented_forms(forms): # one syllable words for number in forms.keys(): for case in forms[number].keys(): f = forms[number][case] if not where_is_accent(f) and count_syllables(f) > 1: forms[number][case] = put_accent(f, PENULTIMATE) return forms
def put_accent_on_unaccented_forms(forms): if not forms: return forms for number in forms.keys(): for gender in forms[number].keys(): for case in forms[number][gender].keys(): f = forms[number][gender][case] if not where_is_accent(f) and count_syllables(f) > 1: forms[number][gender][case] = put_accent(f, 'penultimate') return forms
def comparative_forms(comp_or_super): """ :param comp_or_super: one form ending in os :return: all forms in a dict """ if comp_or_super[-2:] in ['ών', 'ων']: accent = where_is_accent(comp_or_super) neuter = put_accent(comp_or_super[:-2] + 'ον', accent) comp_forms, _ = create_all_adj_forms( f'{comp_or_super}/{comp_or_super}/{neuter}') else: comp_forms, _ = create_all_adj_forms( f'{comp_or_super}/{comp_or_super[:-2]}η/{comp_or_super[:-1]}') return comp_forms
def create_all_pron_forms(bas_forms, strong=True): """ :param bas_forms: basic forms in all genders :param strong: if false, creates week pronouns :return: dict with all forms """ # forms: dic 'fem', 'masc', neut', if not inflected forms is a string masc, fem, neut = bas_forms.split('/') forms = None if masc != neut: if masc[-2:] in ['ός', 'ος'] or masc[-3:] == 'πας': forms, _ = create_all_adj_forms(bas_forms) # acc masc acc_masc_sg = forms[SG][MASC][ACC] acc_fem_sg = forms[SG][FEM][ACC] if acc_masc_sg[-1] in ['ο', 'ό']: forms[SG][MASC][ACC] = acc_masc_sg + ',' + acc_masc_sg + 'ν' forms[SG][FEM][ACC] = acc_fem_sg + ',' + acc_fem_sg + 'ν' if masc == 'αυτός': if strong: from ..resources import AUTOS_STRONG forms = AUTOS_STRONG else: from ..resources import AUTOS_WEAK forms = AUTOS_WEAK if masc == 'ποιος': gen_fem_sg = 'ποιας,ποιανάς,τίνος' gen_neut_sg = gen_masc_sg = 'ποιου,ποιανού,τίνος' gen_pl = 'ποιων,ποιανών,τίνων' forms[SG][MASC][GEN] = gen_masc_sg forms[SG][FEM][GEN] = gen_fem_sg forms[SG][NEUT][GEN] = gen_neut_sg forms[PL][MASC][GEN] = gen_pl forms[PL][FEM][GEN] = gen_pl forms[PL][NEUT][GEN] = gen_pl elif masc[-6:] == 'δήποτε': suffix = 'δήποτε' bas_forms = bas_forms.replace('σδήποτε', 'ς') bas_forms = bas_forms.replace('δήποτε', '') forms = create_all_pron_forms(bas_forms) for number in forms: for gender in forms[number]: for case in forms[number][gender]: form = forms[number][gender][case] forms_for_case = [] for s_f in form.split(','): if s_f: if s_f[-1] == 'ς': s_f = s_f[:-1] + 'σ' r = s_f + suffix forms_for_case.append(r) forms[number][gender][case] = ','.join(forms_for_case) # ενας, μια, ενα elif masc[-4:] == 'ένας' or masc[-3:] == 'είς': forms = {'sg': {'masc': {}, 'fem': {}, 'neut': {}}} if masc[-4:] == 'ένας': prefix_mn = masc[:-4] else: prefix_mn = masc[:-3] forms['sg']['masc'][ 'nom'] = prefix_mn + 'ένας' + ',' + prefix_mn + 'είς' if masc in ['κανείς', 'κανένας']: forms['sg']['masc']['nom'] = 'κανείς,κανένας,' + 'κάνας' forms['sg']['masc'][ 'acc'] = prefix_mn + 'ένα' + ',' + prefix_mn + 'έναν' forms['sg']['masc']['gen'] = prefix_mn + 'ενός' forms['sg']['fem']['nom'] = fem forms['sg']['fem']['acc'] = ','.join( [sf + ',' + sf + 'ν' for sf in fem.split(',')]) forms['sg']['fem']['gen'] = ','.join( [sf + 'ς' for sf in fem.split(',')]) forms['sg']['neut']['nom'] = prefix_mn + 'ένα' forms['sg']['neut']['acc'] = prefix_mn + 'ένα' forms['sg']['neut']['gen'] = prefix_mn + 'ενός' elif masc == 'τις': from ..resources import TIS forms = TIS elif masc == 'όστις': from ..resources import OSTIS forms = OSTIS elif masc == 'όσπερ': from ..resources import OSPER forms = OSPER elif masc[-2:] in ['οι', 'οί']: forms = {'pl': {'masc': {}, 'fem': {}, 'neut': {}}} thema = masc[:-2] accent = where_is_accent(masc) forms[PL][MASC][NOM] = masc forms[PL][MASC][GEN] = put_accent(thema + 'ων', accent) forms[PL][MASC][ACC] = put_accent(thema + 'ους', accent) forms[PL][MASC][VOC] = masc forms[PL][FEM][NOM] = fem forms[PL][FEM][GEN] = put_accent(thema + 'ων', accent) forms[PL][FEM][ACC] = fem forms[PL][FEM][VOC] = fem forms[PL][NEUT][NOM] = neut forms[PL][NEUT][GEN] = put_accent(thema + 'ων', accent) forms[PL][NEUT][ACC] = neut forms[PL][NEUT][VOC] = neut else: if masc in [ 'καθετί', 'τι', 'κατιτί', 'τίποτα', 'τίποτε', 'οτιδήποτε', 'ίντα', 'ό,τι' ]: forms, _ = create_all_adj_forms(bas_forms) for number in forms: for gender in forms[number]: for case in forms[number][gender]: if gender != NEUT or case not in [NOM, ACC]: forms[number][gender][case] = '' elif masc == 'εγώ': if strong: from ..resources import EGO_STRONG forms = EGO_STRONG else: from ..resources import EGO_WEAK forms = EGO_WEAK elif masc == 'εσύ': if strong: from ..resources import ESU_STRONG forms = ESU_STRONG else: from ..resources import ESU_WEAK forms = ESU_WEAK elif masc == 'αλλήλων': forms = { 'pl': { 'masc': { 'gen': 'αλλήλων', 'acc': 'αλλήλους' }, 'fem': { 'gen': 'αλλήλων', 'acc': 'αλλήλες' }, } } elif masc in ['όπερ', 'τουθόπερ']: forms = {'sg': {'neut': {'nom': masc, 'acc': masc}}} return forms elif masc == 'ταύτα': forms = {'pl': {'neut': {'nom': masc, 'acc': masc}}} return forms elif masc == 'εαυτός': forms = { 'sg': { 'masc': { 'nom': masc, 'acc': 'εαυτό,εαυτόν', 'gen': 'εαυτού' }, 'pl': { 'masc': { 'nom': 'εαυτοί', 'acc': 'εαυτούς', 'gen': 'ευτών' } } } } return forms else: raise ValueError # remove vocatives for number in forms: for gender in forms[number]: for case in forms[number][gender]: if case == VOC: forms[number][gender][case] = '' return forms
def create_basic_forms(pron): """ :param pron: pronoun in nom sg masc, if declination applies :return: as in adj masc/fem/neut """ # pronoun = {gender:{number}} if pron[-2:] in ['ος', 'ός'] or pron[-3:] in ['πας'] and pron != 'τίνος': # like poios bas_forms = adjective.create_all_basic_adj_forms(pron)['adj'] elif pron[-2:] in ['οι', 'οί']: accent = where_is_accent(pron) fem = put_accent(pron[:-2] + 'ες', accent, true_syllabification=False) neut = put_accent(pron[:-2] + 'α', accent, true_syllabification=False) bas_forms = pron + '/' + fem + '/' + neut elif 'δήποτε' in pron: suffix = 'δήποτε' pron_r = pron[:-6] if pron_r[-2:] == 'οσ': bas_forms_r = adjective.create_all_basic_adj_forms(pron_r[:-1] + 'ς')['adj'] fem = bas_forms_r.split('/')[1] neut = bas_forms_r.split('/')[2] bas_forms = pron + '/' + fem + suffix + '/' + neut + suffix else: bas_forms = pron + '/' + pron + '/' + pron elif pron[-4:] == 'ένας' or pron[-3:] == 'είς': # all the pron like kathenas masc_length = 4 if pron[-3:] == 'είς': masc_length = 3 masc = pron fem = pron[:-masc_length] + 'εμία' if len(pron) > 4 and pron[-(masc_length + 1)] == 'ν': fem = pron[:-(masc_length + 1)] + 'μία' if pron == 'ένας': fem = 'μία' neut = pron[:-masc_length] + 'ένα' fem = fem + ',' + put_accent_on_the_ultimate(fem) bas_forms = masc + '/' + fem + '/' + neut elif pron == 'τις': bas_forms = 'τις/τις/τι' elif pron == 'όστις': bas_forms = 'όστις/ήτις/ότι' elif pron == 'όσπερ': bas_forms = 'όσπερ/ήπερ/όπερ' elif pron[-1] in ['η', 'ὴ'] or pron in [ 'μηδεμία', 'μερικοί', 'μου', 'πάσα', 'παν', 'όσο', 'τίνος' ]: # there are some random feminine forms in the list, should be filter out return None else: bas_forms = pron + '/' + pron + '/' + pron return bas_forms
def create_all_basic_adj_forms(adj, inflection=None): """ :param inflection: if relevant, add 'aklito' flag if it is certain does not have any declination (like μωβ) :param adj: masc nom sg form (`ωραίος`) :return: dictionary with keys: 'adj': masc, fem, neut forms as a string divided with / ('ωραίος/ωραία/ωραίο') if alternatives, they are added and separated with a coma 'comparative': if exists in form parathetiko + ',' + alt_parathetiko + '/' + uperthetiko + ',' + alt_uperthetiko with form only in masc sing nom 'adverb': adverb form, if alternatives, then separated with coma 'adverb_comparative': if exists, adverb_parathetiko + ',' + alt_adverb_parathetiko + '/' + adverb_uperthetiko + ',' + alt_adverb_uperthetiko """ if adj[-2:] == 'ον' and adj + 'τα' in greek_corpus: adj = adj[:-2] + 'ων' elif adj[-2:] == 'ές' and adj[:-2] + 'ής' in greek_corpus: # ['εκκρεμές', 'λυκαυγές', 'αλκαλοειδές']: adj = adj[:-2] + 'ής' elif adj[-2:] == 'έν' and adj[:-2] + 'είς' in greek_corpus: # ['ανακοινωθέν']: adj = adj[:-2] + 'είς' elif adj[-2:] == 'ού': if adj[:-2] + 'άς' in greek_corpus: adj = adj[:-2] + 'άς' elif put_accent_on_the_penultimate(adj[:-2] + 'ης') in greek_corpus: adj = put_accent_on_the_penultimate(adj[:-2] + 'ης') elif adj[-1] == 'ί' and adj[:-1] + 'ής' in greek_corpus: adj = adj[:-1] + 'ής' accent = where_is_accent(adj, true_syllabification=False) adj_temp = {'adj': 'masc,fem,neuter', 'comparative': '', 'adverb': '', 'adverb_comparative': ''} adj_forms = [] # most basic case -os if adj[-2:] in ['ός', 'ος']: masc = adj adj_forms.append(masc) if accent == 'ultimate': fem = adj[:-2] + 'ή' else: fem = adj[:-2] + 'η' if adj[-3] in vowels and count_syllables(adj) <= 2: if accent == 'ultimate': fem = adj[:-2] + 'ά' else: fem = adj[:-2] + 'α' elif adj[-3] in vowels and count_syllables(adj) > 2 and not is_accented(modern_greek_syllabify(adj)[-3]): if accent == 'ultimate': fem = adj[:-2] + 'ά' else: fem = adj[:-2] + 'α' if adj[-3] in ['κ', 'θ', 'χ']: if accent == 'ultimate': fem_alt = adj[:-2] + 'ιά' else: fem_alt = adj[:-2] + 'ια' if fem in greek_corpus and fem_alt in greek_corpus: fem = fem + ',' + fem_alt elif fem not in greek_corpus and fem_alt in greek_corpus: fem = fem_alt elif fem in greek_corpus and fem_alt not in greek_corpus: fem = fem else: # for the most part forms on h should be correct, but adj is not very common, so is lacking from db # check for -a by looking for genitive on as in db if accent == 'ultimate': gen = adj[:-2] + 'άς' beta_fem = adj[:-2] + 'ά' else: gen = adj[:-2] + 'ας' beta_fem = adj[:-2] + 'α' if gen in greek_corpus: fem = beta_fem # if it's lacking from the db, still the best guess is to leave the form on -h adj_forms.append(fem) neuter = adj[:-1] adj_forms.append(neuter) elif adj[-2:] in ['ής', 'ης']: # first check which type stem = adj[:-2] if stem + 'ικο' in greek_corpus: # type hs, a, iko, here accent is always on the last syllable of the stem masc = adj fem = stem + 'α' if stem + 'ισσα' in greek_corpus: fem = stem + 'ισσα' neuter = stem + 'ικο' elif where_is_accent(adj) == 'ultimate' and (stem + 'ὶ' in greek_corpus or stem + 'ιά' in greek_corpus): # type, hs, ia, i, mostly colors masc = adj fem = put_accent(stem + 'ια', accent) neuter = put_accent(stem + 'ι', accent) elif put_accent(stem + 'ους', accent, true_syllabification=False) in greek_corpus: # type hs, hs, es masc, fem = adj, adj neuter = put_accent(stem + 'ες', accent, true_syllabification=False) if accent != 'ultimate' and neuter not in greek_corpus: neuter = put_accent(stem + 'ες', 'antepenultimate', true_syllabification=False) elif stem + 'ού' in greek_corpus: # type kafetzhs kafetzou, but is it a adj? masc = adj fem = adj[:-2] + 'ού' neuter = adj[:-1] + 'δικο' else: """ In cases where my corpus cannot help me, I will surmise that it's hs, a (or issa), iko """ if accent == 'penultimate': if adj.endswith('ώδης'): masc, fem = adj, adj neuter = stem + 'ες' else: masc = adj fem = stem + 'α' if stem + 'ισσα' in greek_corpus: fem = stem + 'ισσα' neuter = stem + 'ικο' elif accent == 'ultimate': masc, fem = adj, adj neuter = stem + 'ές' elif adj[-3:] == 'ους': masc, fem = adj, adj neuter = adj[:-1] + 'ν' elif adj[-2:] in ['υς', 'ύς'] or adj in ['γλυκύ']: # my database is unfortunately not that great... stem = adj[:-2] masc = adj neuter = adj[:-1] if adj in ['γλυκύ']: # unfortunately there are some mistakes in my word list wherever forms are given as lemma # and so I have to correct them in this way stem = adj[:-1] masc = adj + 'ς' neuter = adj fem = stem + 'ιά' if fem + 'ς' not in greek_corpus: # look for gen because nom fem can be mistaken for acc pl fem_eia = stem + 'εία' if fem_eia in greek_corpus: fem = fem_eia if adj[-5:] == 'πολύς': fem = adj[:-5] + 'πολλή' elif adj[-2:] in ['ων', 'ών']: stem = adj[:-2] masc = adj fem = None neuter = None if accent == 'penultimate' or not accent: fem = stem + 'ουσα' neuter = stem + 'ον' if accent == 'ultimate' or not accent: fem = stem + 'ούσα' neuter = stem + 'ούν' neuter_alt_1 = stem + 'ών' neuter_alt_2 = stem + 'ούν' if neuter + 'τα' in greek_corpus or neuter + 'τες' in greek_corpus: fem = stem + 'ούσα' elif neuter_alt_1 + 'τα' in greek_corpus or neuter_alt_1 + 'τες' in greek_corpus or adj in ['ζων', 'κυβερνών', 'επιζών']: fem = stem + 'ώσα' neuter = neuter_alt_1 elif neuter_alt_2 + 'τα' in greek_corpus or neuter_alt_2 + 'τες' in greek_corpus or neuter_alt_2 + 'των' in greek_corpus: fem = stem + 'ούσα' neuter = neuter_alt_2 if not accent: neuter = remove_all_diacritics(neuter) # it is also possible, that there are wn, onos if adj[:-2] + 'ονος' in greek_corpus: masc, fem = adj, adj neuter = adj[:-2] + 'ον' elif adj[-3:] == 'είς': # passive aorist participles if not adj[:-3] + 'έντα' in greek_corpus: raise NotLegalAdjectiveException masc = adj fem = adj[:-1] + 'σα' neuter = adj[:-3] + 'έν' elif adj[-2:] in ['ας', 'άς']: # pas, pasa pan and active aorist participles # pas pasa pan pl_nta = adj[:-1] + 'ντα' fem_sa = adj[:-1] + 'σα' if count_syllables(adj) == 1: pl_nta = put_accent(pl_nta, 'penultimate') fem_sa = put_accent(fem_sa, 'penultimate') if pl_nta in greek_corpus: masc = adj fem = fem_sa neuter = adj[:-1] + 'ν' elif adj in ['μέλας']: masc = adj fem = adj[:-2] + 'αινα' neuter = adj[:-1] + 'ν' elif adj == 'μέγας': masc = adj fem = 'μαγάλη' neuter = 'μέγα' elif adj[-4:] == 'ονας': masc = adj fem = adj[:-4] + 'ων' neuter = adj[:-2] elif where_is_accent(adj) == 'ultimate': masc = adj fem = adj[:-2] + 'ού' neuter = adj[:-1] + 'δικο' else: raise NotLegalAdjectiveException elif adj in ['προβεβηκώς', 'κεχηνώς', 'τεθνεώς', 'αφεστώς', 'ἐνεστώς']: masc = adj fem = adj[:-1] + 'σα' neuter = adj # rare but sometimes ancient perf participle elif adj in ['άρρην']: # so rare that it can be solved like that masc = adj fem = adj neuter = masc[:-2] + 'εν' elif adj in ['περίφροντις', 'φέρελπις', 'άφροντις', 'φιλόπατρις', 'μόνορχις', 'παλίμπαις', 'πολύφροντις', 'αρνησίπατρις', 'άπολις', 'άπατρις', 'αφιλόπατρις', 'ενήλιξ', 'πυρρόθριξ', 'δασύθριξ', 'ουλόθριξ', 'κεντρόφυξ', 'πυρρόθριξ', 'υπερήλιξ', 'βλαξ', 'ομήλιξ', 'υπερμέτρωψ', 'κεντρόφυξ', 'μεσήλιξ']: masc, fem = adj, adj neuter = '-' elif adj in ['εύχαρις', 'επίχαρις', 'άχαρις']: masc, fem = adj, adj neuter = adj[:-1] elif adj in ['ίλεως']: masc, fem = adj, adj neuter = adj[:-1] + 'ν' else: masc, fem, neuter = adj, adj, adj if inflection == 'aklito': masc, fem, neuter = adj, adj, adj adj_forms = [masc, fem, neuter] adj_temp['adj'] = '/'.join(adj_forms) # παραθετικά stem = neuter if stem[-1] == 'ς': stem = stem[:-1] + 'σ' parathetika = None alt_parathetiko = None uperthetiko = '-' alt_uperthetiko = None parathetiko = put_accent_on_the_antepenultimate(stem + 'τερος') if parathetiko not in greek_corpus: parathetiko = None else: uperthetiko = put_accent_on_the_antepenultimate(parathetiko[:-5] + 'τατος') if uperthetiko not in greek_corpus: uperthetiko = '-' if neuter[-1] in ['ο', 'ό']: alt_parathetiko = remove_all_diacritics(neuter[:-1]) + 'ύτερος' if alt_parathetiko not in greek_corpus: alt_parathetiko = None else: alt_uperthetiko = put_accent_on_the_antepenultimate(alt_parathetiko[:-5] + 'τατος') if alt_uperthetiko not in greek_corpus: alt_uperthetiko = '-' if parathetiko and alt_parathetiko: parathetika = parathetiko + ',' + alt_parathetiko + '/' + uperthetiko + ',' + alt_uperthetiko elif parathetiko: parathetika = parathetiko + '/' + uperthetiko elif alt_parathetiko and alt_uperthetiko: parathetika = alt_parathetiko + '/' + alt_uperthetiko if neuter in irregular_comparatives.keys(): parathetiko = irregular_comparatives[neuter].split('/')[0] uperthetiko = irregular_comparatives[neuter].split('/')[1] alt_parathetiko, alt_uperthetiko = None, None parathetika = irregular_comparatives[neuter] if parathetika: adj_temp['comparative'] = parathetika # επιρρήματα alt_adv = None if neuter[-1] in ['ο', 'ό']: accent = where_is_accent(neuter) if accent != 'ultimate': adverb = neuter[:-1] + 'α' alt_adv = put_accent_on_the_penultimate(neuter[:-1] + 'ως', true_syllabification=False) else: adverb = neuter[:-1] + 'ά' alt_adv = neuter[:-1] + 'ώς' elif masc[-2:] in ['ής', 'ης'] and neuter[-2:] in ['ές', 'ες']: adverb = remove_all_diacritics(neuter[:-2]) + 'ώς' if adverb not in greek_corpus and neuter[:-2] + 'ως' in greek_corpus: adverb = neuter[:-2] + 'ως' alt_adv = neuter[:-2] + 'ά' elif neuter[-1] in ['υ', 'ύ'] and masc[-1] == 'ς': # it should have the ancient form on ews adverb = put_accent_on_the_penultimate(neuter[:-1] + 'εως') if adverb not in greek_corpus: adverb = adj_forms[1] elif neuter[-1] == 'ί': # colors adverb = put_accent_on_the_ultimate(adj_forms[2] + 'α') elif (masc[-2:] in ['ας', 'άς', 'ων', 'ών'] or masc[-3:] in ['εις', 'είς']) and fem[-2:] == 'σα' and neuter[ -1] == 'ν': # ancient adverbs adverb = put_accent_on_the_penultimate(neuter + 'τως') else: # for aklita adverb = neuter if neuter in ['λίγο', 'πολύ', 'ήσσον', 'κάλλιον']: adverb = neuter # special cases if neuter in ['μέγα', 'μεγάλο']: # special case adverb = 'μέγα' alt_adv = 'μεγάλως' elif (masc[-4:] == 'ονας' or masc[-2:] == 'ων') and fem[-2:] == 'ων': adverb = None elif masc in ['άρρην', 'μέλας']: adverb = None epirrimata = [e for e in [adverb, alt_adv] if e and e in greek_corpus] epirrimata = ','.join(epirrimata) if epirrimata: adj_temp['adverb'] = epirrimata # comparative epirrimata adv_parathetika = None adverb_parathetiko = alt_adverb_parathetiko = adverb_uperthetiko = alt_adverb_uperthetiko = '' if parathetiko: adverb_parathetiko = parathetiko[:-2] + 'α' if uperthetiko != '-': adverb_uperthetiko = ','.join([yp[:-2] + 'α' for yp in uperthetiko.split(',')]) else: adverb_uperthetiko = '-' if alt_parathetiko: alt_adverb_parathetiko = alt_parathetiko[:-2] + 'α' if alt_uperthetiko: alt_adverb_uperthetiko = alt_uperthetiko[:-2] + 'α' else: alt_adverb_uperthetiko = '-' if parathetiko and alt_parathetiko: adv_parathetika = adverb_parathetiko + ',' + alt_adverb_parathetiko + '/' + adverb_uperthetiko + ',' + alt_adverb_uperthetiko elif parathetiko: adv_parathetika = adverb_parathetiko + '/' + adverb_uperthetiko elif alt_parathetiko: adv_parathetika = alt_adverb_parathetiko + '/' + alt_adverb_uperthetiko if neuter in irregular_comparative_adverbs.keys(): adv_parathetika = irregular_comparative_adverbs[neuter] if adv_parathetika: adj_temp['adverb_comparative'] = adv_parathetika return adj_temp
def create_all_noun_forms(nom_sg, gen_sg, nom_pl, genders, proper_name=False): """ :param nom_sg: nominative singular :param gen_sg: genitive singular :param nom_pl: nominative plural :param genders: 'fem' or 'masc' or 'neut', if more than one, than separated with ',' :param proper_name: flag useful for creation of vocatives in proper names :return: tuple with 3 elements: forms in all cases in dictionary, gender, and alternative forms in dictionary, if exist I want to include alternatives in a main dictionary of forms by adding them with a coma separator """ accent = where_is_accent(nom_sg, true_syllabification=False) noun_all = {} if ',' in nom_pl: # irregular plural maybe plurals = nom_pl.split(',') if (plurals[0][-2:] in ['οι', 'οί'] or not plurals[0]) and plurals[1][-1] in ['α', 'ά', 'ή', 'η']: genders = genders + ',neut_irregular' nom_pl = plurals[0] irregular_nom_pl = plurals[1] for gender in genders.split(','): if gender == 'neut_irregular': # they lack gen pl noun_all['neut'] = {} noun_all['neut'][PL] = {} noun_all['neut'][PL][NOM] = irregular_nom_pl noun_all['neut'][PL][ACC] = irregular_nom_pl noun_all['neut'][PL][VOC] = irregular_nom_pl accent = where_is_accent(irregular_nom_pl) gen_pl = irregular_nom_pl[:-1] + 'ων' if irregular_nom_pl[-1] == 'η': gen_pl = put_accent(gen_pl, ULTIMATE) if gen_pl in greek_corpus: noun_all['neut'][PL][GEN] = gen_pl if irregular_nom_pl == 'χρόνια': gen_pl = 'χρόνω,χρόνων,χρονώ,χρονών' noun_all['neut'][PL][GEN] = gen_pl else: noun_all[gender] = {} noun_all[gender][SG] = {} noun_all[gender][PL] = {} noun_all[gender][SG][NOM] = nom_sg noun_all[gender][SG][GEN] = gen_sg noun_all[gender][SG][VOC] = nom_sg noun_all[gender][PL][NOM] = nom_pl noun_all[gender][PL][ACC] = nom_pl noun_all[gender][PL][VOC] = nom_pl if gender in ['fem', 'neut']: noun_all[gender][SG][ACC] = nom_sg noun_all[gender][PL][ACC] = nom_pl elif gender == 'masc' and nom_sg == gen_sg: noun_all[gender][SG][ACC] = nom_sg if nom_sg[-2:] in ['ος', 'ός'] and gen_sg[-2:] in ['ου', 'ού']: noun_all[gender][SG][ACC] = nom_sg[:-1] if nom_sg[:-1] + 'ν' in greek_corpus: noun_all[gender][SG][ ACC] = nom_sg[:-1] + ',' + nom_sg[:-1] + 'ν' masc_voc = put_accent(nom_sg[:-2] + 'ε', accent, true_syllabification=False) noun_all[gender][SG][VOC] = masc_voc if proper_name and count_syllables(nom_sg) < 3: if accent != ULTIMATE: properN_masc_voc = nom_sg[:-1] noun_all[gender][SG][VOC] = properN_masc_voc # but this rule is not always used (Παύλο και Παύλε) and sometimes voc on e is still in usage if masc_voc.lower() in greek_corpus: noun_all[gender][SG][ VOC] = properN_masc_voc + ',' + masc_voc if nom_pl: gens = gen_sg.split(',') accent_pl = where_is_accent(nom_pl, true_syllabification=False) if accent_pl == ANTEPENULTIMATE and ( len(gens) > 1 or where_is_accent( gen_sg, true_syllabification=False) == PENULTIMATE): gen_pl = put_accent(nom_pl[:-2] + 'ων', PENULTIMATE, true_syllabification=False) else: gen_pl = put_accent(nom_pl[:-2] + 'ων', accent_pl, true_syllabification=False) acc_pl = [ put_accent(g[:-2] + 'ους', where_is_accent(g, true_syllabification=False), true_syllabification=False) for g in gens ] acc_pl = ','.join(acc_pl) noun_all[gender][PL][GEN] = gen_pl noun_all[gender][PL][ACC] = acc_pl elif nom_sg[-1:] == 'ς' and nom_pl and nom_pl[-2:] in ['ές', 'ες'] and gen_sg and \ gen_sg == nom_sg[:-1]: g_pl = [] for n_pl in nom_pl.split(','): pl_accent = where_is_accent(n_pl, true_syllabification=False) gen_pl = nom_pl[:-2] + 'ων' if count_syllables(nom_sg) == count_syllables(nom_pl) and ( nom_sg[-2:] in ['ης', 'ής', 'ας', 'άς']): gen_pl = put_accent(gen_pl, ULTIMATE) if (nom_sg[-2:] == 'ας' and count_syllables(nom_sg) > 2 ) and nom_sg[-3:] != 'ίας': gen_pl = put_accent(gen_pl, PENULTIMATE, true_syllabification=False) g_pl.append(gen_pl) else: gen_pl = put_accent(gen_pl, pl_accent, true_syllabification=False) g_pl.append(gen_pl) voc_on_a = False if nom_sg[-3:] in ['τής', 'χης']: voc_a = put_accent(nom_sg[:-2] + 'ά', accent) if voc_a in greek_corpus: voc_on_a = voc_a noun_all[gender][SG][ACC] = nom_sg[:-1] noun_all[gender][SG][VOC] = nom_sg[:-1] if voc_on_a: noun_all[gender][SG][VOC] = voc_on_a noun_all[gender][PL][GEN] = ','.join(g_pl) elif nom_sg[-1:] in ['α', 'ά', 'ή', 'η' ] and gen_sg[-1:] == 'ς' and gender != 'neut': noun_all[gender][SG][ACC] = nom_sg gen_pl = '' if nom_pl: if nom_pl[-2:] in [ 'ες', 'ές' ] and count_syllables(nom_sg) == count_syllables(nom_pl): gen_pl = nom_pl[:-2] + 'ων' gen_pl = put_accent(gen_pl, ULTIMATE) if nom_sg[-3:] in ['ίδα', 'άδα', 'ητα']: gen_pl = put_accent(gen_pl, PENULTIMATE) if gen_pl not in greek_corpus: alt_gen_pl = put_accent(gen_pl, PENULTIMATE) alt_gen_pl_b = put_accent(gen_pl, ANTEPENULTIMATE) if alt_gen_pl in greek_corpus: gen_pl = alt_gen_pl elif alt_gen_pl_b in greek_corpus: gen_pl = alt_gen_pl else: gen_pl = '' elif nom_pl[-3:] == 'εις': gen_pl = nom_pl[:-3] + 'εων' else: pl_accent = where_is_accent(nom_pl, true_syllabification=False) gen_pl_alt = nom_pl[:-2] + 'ων' gen_pl_alt = put_accent(gen_pl_alt, pl_accent, true_syllabification=False) if gen_pl_alt in greek_corpus: gen_pl = gen_pl_alt noun_all[gender][PL][GEN] = gen_pl elif nom_sg[-1:] == 'α' and gender == 'neut': noun_all[gender][SG][ACC] = nom_sg gen_pl = '' if nom_pl: # there can be alternative roots like gala gen_pl = ','.join([ put_accent(n_pl[:-1] + 'ων', PENULTIMATE) for n_pl in nom_pl.split(',') ]) noun_all[gender][PL][GEN] = gen_pl elif nom_sg[-1:] in ['ς', 'ν' ] and gen_sg != nom_sg and gender == 'neut': # to filter out aklita noun_all[gender][SG][ACC] = nom_sg gen_sg_accent = where_is_accent(gen_sg.split(',')[0]) if gen_sg_accent == ANTEPENULTIMATE: gen_sg_accent = PENULTIMATE gen_pl = put_accent( nom_pl.split(',')[0][:-1] + 'ων', gen_sg_accent) if nom_pl[-1] in ['η', 'ή']: gen_pl = put_accent(gen_pl, ULTIMATE) if gen_pl not in greek_corpus: gen_pl_alt = put_accent(gen_pl, PENULTIMATE) if gen_pl_alt in greek_corpus: gen_pl = gen_pl_alt noun_all[gender][PL][GEN] = gen_pl elif nom_sg[-1:] in ['ο', 'ό', 'ί', 'ι', 'ΐ', 'ύ', 'υ' ] and gender == 'neut' and nom_sg != gen_sg: noun_all[gender][SG][ACC] = nom_sg gs_pl = [] if nom_pl and gen_sg: for g_sg in gen_sg.split(','): gen_accent = where_is_accent( g_sg, true_syllabification=False) if g_sg[-1] == 'ς' and gen_accent == ANTEPENULTIMATE: gs_pl.append( put_accent(g_sg[:-2] + 'ων', PENULTIMATE, true_syllabification=False)) else: gs_pl.append( put_accent(g_sg[:-2] + 'ων', gen_accent, true_syllabification=False)) noun_all[gender][PL][GEN] = ','.join(gs_pl) elif nom_sg[-2:] in ['ού', 'ου'] and gender == 'fem': noun_all[gender][SG][ACC] = nom_sg gen_pl = '' if nom_pl: pl_accent = where_is_accent(nom_pl, true_syllabification=False) gen_pl = put_accent(nom_pl[:-2] + 'ων', pl_accent, true_syllabification=False) noun_all[gender][PL][GEN] = gen_pl elif nom_sg[-3:] == 'έας' and nom_pl[-3:] == 'είς': if gender == 'fem': noun_all[gender][SG][ GEN] = gen_sg + ',' + nom_sg[:-2] + 'ως' noun_all[gender][SG][ACC] = nom_sg[:-1] noun_all[gender][SG][VOC] = nom_sg[:-1] gen_pl = '' if nom_pl: gen_pl = nom_sg[:-2] + 'ων' noun_all[gender][PL][GEN] = gen_pl elif nom_sg == nom_pl: # aklita noun_all[gender][SG][ACC] = nom_sg noun_all[gender][PL][GEN] = nom_pl elif nom_sg[-1:] == 'ς' and gender != 'neut': # special cases: noun_all[gender][SG][ACC] = nom_sg[:-1] if nom_sg[:-1] + 'ν' in greek_corpus: noun_all[gender][SG][ ACC] = nom_sg[:-1] + ',' + nom_sg[:-1] + 'ν' noun_all[gender][SG][VOC] = nom_sg[:-1] gen_pl = '' if nom_pl: gen_pl = nom_pl[:-2] + 'ων' if not ',' in gen_sg: accent_gen_sg = where_is_accent( gen_sg, true_syllabification=False) gen_pl = put_accent(gen_pl, accent_gen_sg, true_syllabification=False) else: accent_nom_pl = where_is_accent( nom_pl, true_syllabification=False) if accent_nom_pl != ANTEPENULTIMATE: gen_pl = put_accent(gen_pl, accent_nom_pl, true_syllabification=False) else: gen_pl = put_accent(gen_pl, PENULTIMATE, true_syllabification=False) noun_all[gender][PL][GEN] = gen_pl if remove_all_diacritics(nom_pl[-3:]) in ['δες', 'τες']: accs = [] vocs = [nom_sg] acc_1 = nom_sg[:-1] if acc_1 in greek_corpus: accs.append(acc_1) acc_2 = nom_pl[:-2] + 'α' if acc_2 in greek_corpus: accs.append(acc_2) voc_2 = nom_sg[:-1] if voc_2 in greek_corpus: vocs.append(voc_2) noun_all[gender][SG][ACC] = ','.join(accs) noun_all[gender][SG][VOC] = ','.join(vocs) elif nom_sg[-3:] in ['εύς', 'ευς']: noun_all[gender][SG][ACC] = gen_sg[:-2] + 'α' if nom_sg == 'Ζευς': noun_all[gender][SG][ACC] = 'Δία,Διά' noun_all[gender][SG][VOC] = nom_sg[:-1] elif nom_sg[-2:] == 'ής' and nom_pl[-3:] == 'είς': noun_all[gender][PL][GEN] = nom_pl[:-3] + 'ών' elif (nom_sg[-1:] in ['ρ', 'ν', 'ξ', 'ύ', 'υ' ]) and (gen_sg[-2:] in ['ος', 'ός']): if gender != 'neut': if not nom_pl: print(nom_sg, 'no nom_pl error') raise ValueError noun_all[gender][SG][ACC] = nom_pl[:-2] + 'α' voc_sg = gen_sg[:-2] if gen_sg[-4:] in ['ντος', 'κτος' ] or count_syllables(nom_sg) == 1: voc_sg = nom_sg noun_all[gender][SG][VOC] = voc_sg gen_pl = put_accent(nom_pl[:-2] + 'ων', PENULTIMATE) if where_is_accent(gen_sg) == ULTIMATE: gen_pl = put_accent(gen_pl, ULTIMATE) noun_all[gender][PL][GEN] = gen_pl else: noun_all[gender][SG][ACC] = nom_sg gen_pl = put_accent(nom_pl[:-1] + 'ων', PENULTIMATE) noun_all[gender][PL][GEN] = gen_pl elif not nom_sg and (nom_pl[-2:] in ['ες', 'οι', 'ές', 'οί'] or nom_pl[-1:] in ['α', 'η', 'ά', 'ή']): if nom_pl[-2:] in ['οι', 'οί']: accent = where_is_accent(nom_pl, true_syllabification=False) acc_pl = put_accent(nom_pl[:-2] + 'ους', accent, true_syllabification=False) if accent == ANTEPENULTIMATE: acc_pl_alt = put_accent(acc_pl, PENULTIMATE, true_syllabification=False) if acc_pl in greek_corpus and acc_pl_alt in greek_corpus: acc_pl = acc_pl + ',' + acc_pl_alt elif acc_pl_alt in greek_corpus: acc_pl = acc_pl_alt noun_all[gender][PL][ACC] = acc_pl thema = nom_pl[:-2] if nom_pl[-1] in ['α', 'η', 'ά', 'ή']: thema = nom_pl[:-1] accent = where_is_accent(nom_pl, true_syllabification=False) gen_pl = put_accent(thema + 'ων', accent, true_syllabification=False) if nom_pl[-1] in ['η', 'ή']: gen_pl = put_accent(gen_pl, ULTIMATE) if accent == ANTEPENULTIMATE: gen_pl_alt = put_accent(gen_pl, PENULTIMATE, true_syllabification=False) if gen_pl in greek_corpus and gen_pl_alt in greek_corpus: gen_pl = gen_pl + ',' + gen_pl_alt elif gen_pl not in greek_corpus: gen_pl = gen_pl_alt if nom_pl[-2:] in ['ες', 'ές']: if nom_pl[-4:-2] not in ['άδ', 'ήτ']: gen_pl = put_accent(gen_pl, ULTIMATE) noun_all[gender][PL][GEN] = gen_pl """ irregularities """ if nom_sg == 'χρόνος': noun_all[gender][PL][GEN] = 'χρόνων,χρονών,χρόνω,χρονώ' return noun_all
def create_all_basic_noun_forms(noun, inflection=None, gender=None, proper_name=False): """ :param proper_name: Proper names behave differently from normal nouns, so if it is known, it should be flagged :param gender: In case of some nouns, gender should be given, where it cannot be correctly guessed on the basis of the ending :param inflection: It can have value None (then inflection is found automatically, or "aklito" (indeclinable) :param noun: must be nom sg :return: dictionary with keys: nom_sg, gen_sg, nom_pl and gender. Alternative forms are divided with coma """ noun_temp ={'nom_sg': noun, 'gen_sg': '', 'nom_pl': '', 'gender': ''} number_of_syllables = count_syllables(noun, true_syllabification=False) accent = where_is_accent(noun, true_syllabification=False) ultimate_accent = accent == ULTIMATE capital = noun[0].isupper() noun = noun.lower() prefixes = ['νανο', 'μικρο', 'σκατο', 'παλιο'] # on 'os' if noun[-2:] in ['ός', 'ος']: stem = noun[:-2] plural_form = put_accent(stem + 'οι', accent, true_syllabification=False) gen_form = put_accent(stem + 'ου', accent, true_syllabification=False) if remove_diaer(plural_form) == put_accent(stem + 'οι', accent): plural_form = remove_diaer(plural_form) if remove_diaer(gen_form) == put_accent(stem + 'ου', accent): gen_form = remove_diaer(gen_form) gens_sg = [] noun_temp['gender'] = 'masc' # the problem is that many long words on -os that are part of some kind of jargon and do not have any other form # declined in the corpus, i will assume then that words above 4 syllables do exist, but only in singular, the # same should be the case for neuter long words on -o # also some proper names in greek_corpus are, as is proper, capitalized if gen_form in greek_corpus or gen_form.capitalize() in greek_corpus or gender == 'masc' or number_of_syllables >4: gens_sg.append(gen_form) if accent == ANTEPENULTIMATE: gen_form_alt = put_accent(gen_form, PENULTIMATE, true_syllabification=False) if gen_form_alt in greek_corpus: gens_sg.append(gen_form_alt) noun_temp['gen_sg'] = ','.join(gens_sg) if plural_form in greek_corpus or plural_form.capitalize() in greek_corpus or number_of_syllables > 4 or gender == 'masc': noun_temp['nom_pl'] = plural_form if not gens_sg: noun_temp['gen_sg'] = gen_form if noun in feminine_os: noun_temp['gender'] = 'fem' if noun in feminine_or_masc or (noun[-5:] == 'λόγος' and number_of_syllables > 3): # especially all kinds of professionals noun_temp['gender'] = 'fem,masc' if not noun_temp['nom_pl'] and not gens_sg or gender == 'neut': # maybe its neuter like lathos plural_form = stem + 'η' gen_form = stem + 'ους' if accent == ULTIMATE: plural_form = stem + 'ή' gen_form = stem + 'ούς' elif accent == ANTEPENULTIMATE: plural_form = put_accent_on_the_penultimate(plural_form) gen_form = put_accent_on_the_penultimate(gen_form) if plural_form in greek_corpus or gen_form in greek_corpus or gender == 'neut': noun_temp['nom_pl'] = plural_form noun_temp['gen_sg'] = gen_form noun_temp['gender'] = 'neut' # γεγονός και άλλες μετοχές τού παρακειμένου plural_form = noun[:-1] + 'τα' gen_form = noun[:-1] + 'τος' if plural_form in greek_corpus or gen_form in greek_corpus: noun_temp['nom_pl'] = plural_form noun_temp['gen_sg'] = gen_form noun_temp['gender'] = 'neut' # in all other instances probably they are correct masculine words, but don't occur in the corpus, still for # proper name don't add plural if it doesn't exist in the corpus if not (noun_temp['nom_pl'] or noun_temp['gen_sg']): stem = noun[:-2] plural_form = stem + 'οι' gen_form = put_accent(stem + 'ου', accent, true_syllabification=False) noun_temp['gender'] = 'masc' if accent == ULTIMATE: plural_form = stem + 'οί' gen_form = stem + 'ού' if not proper_name: noun_temp['nom_pl'] = plural_form noun_temp['gen_sg'] = gen_form noun_temp['gender'] = 'masc' elif noun[-1] == 'ς' and \ ((noun[:-1] + 'δες' in greek_corpus) or (put_accent_on_the_antepenultimate(noun[:-1] + 'δες') in greek_corpus)) and noun[-2:] != 'ις': # imparisyllaba on des, archaic and modern noun_temp['gender'] = 'masc' noun_temp['gen_sg'] = noun[:-1] plurals = [] plural_form = noun[:-1] + 'δες' # sometimes the accent has to be moved, and sometimes there are alternatives plural_form_a = put_accent_on_the_antepenultimate(plural_form) plural_form_b = put_accent_on_the_penultimate(plural_form) if plural_form in greek_corpus: plurals.append(plural_form) if plural_form_a in greek_corpus: plurals.append(plural_form_a) if plural_form_b in greek_corpus: plurals.append(plural_form_b) plurals = list(set(plurals)) noun_temp['nom_pl'] = ','.join(plurals) gen_form = noun[:-1] gen_form_a = put_accent_on_the_penultimate(gen_form) gen_form_arch = noun[:-1] + 'δος' if count_syllables(noun) == 1: gen_form_arch = put_accent_on_the_ultimate(gen_form_arch) gens = [] if gen_form in greek_corpus: gens.append(gen_form) if gen_form_a in greek_corpus: gens.append(gen_form_a) if gen_form_arch in greek_corpus: gens.append(gen_form_arch) gens = list(set(gens)) noun_temp['gen_sg'] = ','.join(gens) elif noun[-2:] in ['ές', 'ες']: # they can be either pluralia tantum or masc on es that do not have plur in the corpus or neuter on es or aklito if noun[:-1] in greek_corpus or noun[:-1].capitalize() in greek_corpus: # this means its a gen. of a masc form noun_temp['gender'] = 'masc' noun_temp['gen_sg'] = noun[:-1] nom_pl = noun[:-1] + 'δες' if nom_pl not in greek_corpus: nom_pl_alt = put_accent(noun[:-2] + 'ηδες', ANTEPENULTIMATE) if nom_pl_alt in greek_corpus: nom_pl = nom_pl_alt noun_temp['nom_pl'] = nom_pl elif put_accent(noun[:-2] + 'ους', accent) in greek_corpus: noun_temp['gen_sg'] = put_accent(noun[:-2] + 'ους', accent) noun_temp['nom_pl'] = put_accent(noun[:-2] + 'η', accent) noun_temp['gender'] = 'neut' elif (noun[:-2] + 'ων') in greek_corpus or (remove_all_diacritics(noun[:-2]) + 'ών') in greek_corpus or \ (noun[:-2] + 'ων').capitalize() in greek_corpus or (remove_all_diacritics(noun[:-2]) + 'ών').capitalize() in greek_corpus or noun in ['προάλλες', 'πρόποδες']: noun_temp['gender'] = 'fem' if noun in ['πρόποδες', 'χοιράδες']: noun_temp['gender'] = 'masc' noun_temp['gen_sg'] = '' noun_temp['nom_pl'] = noun noun_temp['nom_sg'] = '' else: # should be neuter aklita noun_temp['gender'] = 'neut' noun_temp['gen_sg'] = noun noun_temp['nom_pl'] = noun noun_temp['nom_sg'] = noun elif noun[-2:] in ['άς', 'ής', 'ας', 'ης']: noun_temp['gender'] = 'masc' # es plural_form_a = noun[:-2] + 'ες' gen_form_a = noun[:-1] if ultimate_accent: plural_form_a = noun[:-2] + 'ές' # eas - eis, plural_form_b = noun[:-3] + 'είς' gen_form_b = noun[:-1] # hs, eis plural_form_ba = noun[:-2] + 'είς' gen_form_ba = noun[:-2] + 'ούς' # ancient forms plural_form_c = noun[:-1] + 'τες' plural_form_c_neut = noun[:-1] + 'τα' gen_form_c = noun[:-1] + 'τος' if not ultimate_accent: plural_form_c = put_accent_on_the_antepenultimate(plural_form_c, true_syllabification=False) plural_form_c_neut = put_accent_on_the_antepenultimate(plural_form_c_neut, true_syllabification=False) gen_form_c = put_accent_on_the_antepenultimate(gen_form_c, true_syllabification=False) if plural_form_c in greek_corpus and gen_form_c in greek_corpus: nom_pl = plural_form_c gen_sg = gen_form_c # but there is possible, that there is also more dimotiki form of gen_sg if gen_form_a in greek_corpus: gen_sg = gen_form_c + ',' + gen_form_a elif (plural_form_b in greek_corpus and gen_form_b in greek_corpus) and noun[-3:] not in ['ίας']: # the last condition is to exclude possibility, that it is false positive because of some same sounding # fut aorist forms nom_pl = plural_form_b gen_sg = gen_form_b elif plural_form_ba in greek_corpus and gen_form_ba in greek_corpus: nom_pl = plural_form_ba gen_sg = gen_form_ba elif plural_form_a in greek_corpus: nom_pl = plural_form_a gen_sg = gen_form_a elif plural_form_c_neut in greek_corpus and gen_form_c in greek_corpus: nom_pl = plural_form_c_neut gen_sg = gen_form_c noun_temp['gender'] = 'neut' else: nom_pl = None gen_sg = None if nom_pl: noun_temp['nom_pl'] = nom_pl noun_temp['gen_sg'] = gen_sg elif noun[-2:] == 'άς': # if corpus doesnt help, more probable is that ending in as is imparisyllaba noun_temp['nom_pl'] = noun[:-1] + 'δες' noun_temp['gen_sg'] = gen_form_a # there are many professions that are rarely in plural, but which do have gen, and almost all of them # create gen by subtracting s else: noun_temp['nom_pl'] = plural_form_a if noun[-3:] == 'έας': noun_temp['nom_pl'] = plural_form_b noun_temp['gen_sg'] = gen_form_a # lastly check maybe there are professions which can be feminine if noun in feminine_or_masc: noun_temp['gender'] = 'masc,fem' # and again a better test for all -eas, if there is gen sg on ews, this has certainly femine form, this gen # form cannot be added as an alternative, as it occures only for feminines and has to be added in create_all # function fem_gen = noun[:-3] + 'έως' if noun[-3:] == 'έας' and fem_gen in greek_corpus: noun_temp['gender'] = 'masc,fem' elif noun[-3:] in ['εύς', 'ευς']: plural_form = noun[:-3] + 'είς' gen_form = noun[:-3] + 'έως' noun_temp['gender'] = 'masc' if plural_form in greek_corpus and gen_form in greek_corpus: noun_temp['nom_pl'] = plural_form noun_temp['gen_sg'] = gen_form if noun == 'Ζευς': noun_temp['gen_sg'] = 'Διός,Δίος' noun_temp['nom_pl'] = '' if noun in feminine_or_masc: noun_temp['gender'] = 'fem,masc' elif noun[-2:] in ['ώς', 'ως']: noun_temp['gender'] = 'neut' plural_form = noun[:-1] + 'τα' gen_form = noun[:-1] + 'τος' thema_ot = put_accent(noun[:-2] + 'οτ', accent) if count_syllables(noun) == 1: gen_form = put_accent_on_the_ultimate(gen_form) plural_form = put_accent_on_the_antepenultimate(plural_form) if plural_form in greek_corpus or gen_form in greek_corpus: noun_temp['nom_pl'] = plural_form noun_temp['gen_sg'] = gen_form # there is possibility, that the thema is on 'οτ' elif thema_ot + 'ος' in greek_corpus: noun_temp['gender'] = 'neut' gen_form = thema_ot + 'ος' plural_form = thema_ot + 'α' noun_temp['nom_pl'] = plural_form noun_temp['gen_sg'] = gen_form # there are also rare feminine on ως with gen on υος eg 'αιδώς' elif put_accent(noun[:-2] + 'ους', accent) in greek_corpus: gen_form = put_accent(noun[:-2] + 'ους', accent) noun_temp['gen_sg'] = gen_form noun_temp['gender'] = 'fem' elif noun == 'άλως': # its kind of exception gen_form = 'άλω' noun_temp['gen_sg'] = gen_form noun_temp['gender'] = 'fem' elif noun[-2:] in ['ις', 'ΐς', 'ίς']: noun_temp['gender'] = 'fem' plural_form = put_accent_on_the_penultimate(noun[:-2] + 'εις', true_syllabification=False) gen_form = noun[:-2] + 'εως' if noun == 'μις': # special case plural_form = noun gen_form = noun if gen_form in greek_corpus or plural_form in greek_corpus: noun_temp['nom_pl'] = plural_form noun_temp['gen_sg'] = gen_form else: # maybe gen on idos gen_form = noun[:-1] + 'δος' plural_form = noun[:-1] + 'δες' print(gen_form, plural_form) if gen_form in greek_corpus or plural_form in greek_corpus or gender == 'fem': noun_temp['nom_pl'] = plural_form noun_temp['gen_sg'] = gen_form elif noun[-3:] in ['ους', 'ούς']: if 'πλους' in noun or 'νους' in noun and noun != 'μπόνους': noun_temp['gender'] = 'masc' noun_temp['gen_sg'] = noun[:-1] elif noun == 'ους': # το αυτί χρειάζεται να είναι μόνο του noun_temp['gender'] = 'neut' noun_temp['gen_sg'] = 'ωτός' noun_temp['nom_pl'] = 'ώτα' else: # aklita noun_temp['gender'] = 'neut' noun_temp['gen_sg'] = noun noun_temp['nom_pl'] = noun elif noun[-2:] in ['υς', 'ύς']: noun_temp['gender'] = 'fem' gen_form = noun[:-1] + 'ος' thema = put_accent_on_the_ultimate(noun) if count_syllables(noun) == 1: gen_form = put_accent_on_the_ultimate(gen_form) plur_form = thema[:-1] + 'ες' if noun in ['βοτρύς','ιχθύς','πέλεκυς', 'μυς']: noun_temp['gender'] = 'masc' if noun == 'πέλεκυς': gen_form = 'πελέκεως' plur_form = 'πελέκεις' noun_temp['gen_sg'] = gen_form noun_temp['nom_pl'] = plur_form elif noun[-1] in ['α', 'η', 'ά', 'ή']: # feminina noun_temp['gender'] = 'fem' gen_a = noun + 'ς' noun_temp['gen_sg'] = gen_a plural_form_a = noun[:-1] + 'ες' if ultimate_accent: plural_form_a = noun[:-1] + 'ές' plural_form_b = put_accent_on_the_penultimate(noun[:-1] + 'εις', true_syllabification=False) plural_form_c = noun + 'δες' if plural_form_c in greek_corpus: noun_temp['nom_pl'] = plural_form_c elif plural_form_a in greek_corpus and plural_form_a not in ['γες']: # unfortunetly for some very short words it can fail, ad hoc solution is to implement some kind of a list noun_temp['nom_pl'] = plural_form_a # special case for neuter on ma if noun[-2:] == 'μα' and (plural_form_a not in greek_corpus or plural_form_b not in greek_corpus or plural_form_c not in greek_corpus or gender == 'neut' or put_accent_on_the_antepenultimate(noun + 'τα', true_syllabification=False) in greek_corpus): plural_form = put_accent_on_the_antepenultimate(noun + 'τα', true_syllabification=False) gen_form = put_accent_on_the_antepenultimate(noun + 'τος', true_syllabification=False) noun_temp['nom_pl'] = plural_form noun_temp['gen_sg'] = gen_form noun_temp['gender'] = 'neut' elif noun[-1] == 'α' and noun + 'τος' in greek_corpus and noun + 'τα' in greek_corpus or gender == 'neut': # gala, galatos noun_temp['nom_sg'] = noun noun_temp['nom_pl'] = put_accent_on_the_antepenultimate(noun + 'τα') noun_temp['gen_sg'] = put_accent_on_the_antepenultimate(noun + 'τος') noun_temp['gender'] = 'neut' if 'γάλα' in noun: noun_temp['nom_pl'] = noun + 'τα' + ',' + noun + 'κτα' noun_temp['gen_sg'] = noun + 'τος' + ',' + noun + 'κτος' if (noun[-1] in ['α', 'ά'] and gen_a not in greek_corpus and plural_form_a not in greek_corpus and put_accent(noun[:-1] + 'ων', accent) in greek_corpus) or noun in plur_tant_neut: # maybe pluralia tantum noun_temp['nom_sg'] = '' noun_temp['nom_pl'] = noun noun_temp['gen_sg'] = '' noun_temp['gender'] = 'neut' if (noun[-2:] in ['ση', 'ξη', 'ψη'] or noun in feminine_h_eis) and put_accent_on_the_ultimate(noun[:-1] + 'ων') not in greek_corpus: # it has to be if, because it can be earlier falsly recognized as a correct form on es, because of som aorists # in sec person sg noun_temp['nom_pl'] = plural_form_b noun_temp['gen_sg'] = gen_a + ',' + put_accent_on_the_antepenultimate(noun[:-1] + 'εως', true_syllabification=False) elif noun[-2:] == 'ού': noun_temp['gender'] = 'fem' noun_temp['gen_sg'] = noun + 'ς' plural_form = noun + 'δες' if plural_form in greek_corpus: noun_temp['nom_pl'] = plural_form elif noun[-1] in ['ό', 'ο']: if noun[-3:] == 'ιμο': plural_form = noun[:-1] + 'ατα' gen_form = noun[:-1] + 'ατος' plural_form = put_accent_on_the_antepenultimate(plural_form) gen_form = put_accent_on_the_penultimate(gen_form) if plural_form in greek_corpus or gen_form in greek_corpus: noun_temp['nom_pl'] = plural_form noun_temp['gen_sg'] = gen_form noun_temp['gender'] = 'neut' return noun_temp noun_temp['gender'] = 'neut' plural_form = noun[:-1] + 'α' gen_form = noun[:-1] + 'ου' if ultimate_accent: plural_form = noun[:-1] + 'ά' gen_form = noun[:-1] + 'ού' if plural_form in greek_corpus or\ plural_form.capitalize() in greek_corpus or\ number_of_syllables>4 or\ (gender not in ['fem', 'masc'] and inflection != 'aklito'): noun_temp['nom_pl'] = plural_form gens = [] if gen_form in greek_corpus or\ gen_form.capitalize() in greek_corpus or\ number_of_syllables>4 : gens.append(gen_form) if accent == ANTEPENULTIMATE: gen_a = put_accent(gen_form, PENULTIMATE, true_syllabification=False) if gen_a in greek_corpus: gens.append(gen_a) if gens: noun_temp['gen_sg'] = ','.join(gens) elif gender not in ['fem', 'masc'] and inflection != 'aklito': noun_temp['gen_sg'] = gen_form else: # σ`αυτήν την περίπτωση υποθετούμε πως είναι ουδέτερα άκλιτα noun_temp['nom_pl'] = noun noun_temp['gen_sg'] = noun noun_temp['gender'] = 'neut' elif noun[-1] in ['ι', 'ί', 'ΐ'] and noun[-2:] not in ['οι', 'οί']: noun_temp['gender'] = 'neut' plural_form = noun + 'α' gen_form = put_accent_on_the_ultimate(noun + 'ου') if ultimate_accent: plural_form = put_accent_on_the_ultimate(plural_form) if plural_form[-3] in vowels: plural_form = plural_form[:-2] + 'γι' + plural_form[-1] gen_form = gen_form[:-3] + 'γι' + gen_form[-2:] # in greek corpus there are lacking some upokoristika if plural_form in greek_corpus or noun[-3:] in ['άκι', 'ίκι', 'άρι', 'έκι', 'ήρι', 'ίδι', 'ύρι']: noun_temp['nom_pl'] = plural_form noun_temp['gen_sg'] = gen_form elif inflection != "aklito": # if corpus doesnt help, but we know, that it's declinab noun_temp['nom_pl'] = plural_form if noun_temp['nom_pl'] == '' and noun_temp['gen_sg'] == '': # we conclude these are aklita, but I am sure there will be some uncovered words that do decline, # I have no idea though how to sort them out noun_temp['nom_pl'] = noun noun_temp['gen_sg'] = noun elif noun[-2:] in ['οι', 'οί']: # pluralis tantum masc noun_temp['gender'] = 'masc' noun_temp['nom_pl'] = noun noun_temp['nom_sg'] = '' noun_temp['gen_sg'] = '' # ending n is a bit tricky, so we will work it out separatly elif noun[-2:] in ['ον', 'όν', 'έν', 'εν', 'άν', 'αν']: # ουδετερα ουσιαστικά με θέμα σε -ντ, παιρνει ύποψη και τα αρχαία ουδέτερα Β' κλίσης σε -ον noun_temp['gender'] = 'neut' plural_form = noun + 'τα' gen_form = noun + 'τος' # αρχαίες λέξεις με ον plural_form_a = '' gen_form_a = '' if noun[-2:] in ['ον', 'όν']: plural_form_a = noun[:-2] + 'ά' gen_form_a = noun[:-2] + 'ού' if not ultimate_accent: plural_form_a = noun[:-2] + 'α' gen_form_a = put_accent_on_the_penultimate(gen_form_a, true_syllabification=False) if not is_accented(noun): # μονοσύλλαβα τονίζονται στην γενική στην ληγούσα plural_form = put_accent_on_the_penultimate(plural_form, true_syllabification=False) gen_form = put_accent_on_the_ultimate(gen_form) if noun == 'ον': gen_form = put_accent_on_the_penultimate(gen_form) if plural_form in greek_corpus and gen_form in greek_corpus: noun_temp['nom_pl'] = plural_form noun_temp['gen_sg'] = gen_form elif plural_form_a in greek_corpus and gen_form_a in greek_corpus: noun_temp['nom_pl'] = plural_form_a noun_temp['gen_sg'] = gen_form_a else: # it is assumed it's a borrowing from french if noun in ['ρεσεψιόν', 'σπορτσγούμαν']: # there are certainly more noun_temp['gender'] = 'fem' noun_temp['nom_pl'] = noun noun_temp['gen_sg'] = noun elif noun[-2:] in ['ων', 'ών']: noun_temp['gender']= 'masc' irregular_3 = {'κύων': 'κυν', 'είρων': 'είρων', 'ινδικτιών': 'ινδικτιών'} # 2 possibilities stem_a = noun[:-2] + 'όν' stem_b = noun[:-2] + 'όντ' stem_c = noun[:-2] + 'ούντ' stem_d = noun[:-2] + 'ώντ' plural_form_a = stem_a + 'ες' gen_form_a = stem_a + 'ος' plural_form_b = stem_b + 'ες' gen_form_b = stem_b + 'ος' plural_form_c = stem_c + 'ες' gen_form_c = stem_c + 'ος' plural_form_d = stem_d + 'ες' gen_form_d = stem_d + 'ος' ir_stem = False if noun in irregular_3.keys(): ir_stem = irregular_3[noun] if ir_stem: ir_pl = ir_stem + 'ες' ir_gen = ir_stem + 'ος' if count_syllables(ir_stem) == 1 and ir_gen not in greek_corpus: ir_pl = put_accent_on_the_antepenultimate(ir_pl) ir_gen = put_accent_on_the_antepenultimate(ir_gen) if ir_gen not in greek_corpus: ir_gen = put_accent_on_the_ultimate(ir_gen) if ir_pl in greek_corpus and ir_gen in greek_corpus: noun_temp['nom_pl'] = ir_pl noun_temp['gen_sg'] = ir_gen return noun_temp if not ultimate_accent: plural_form_a = put_accent_on_the_antepenultimate(plural_form_a, true_syllabification=False) gen_form_a = put_accent_on_the_antepenultimate(gen_form_a, true_syllabification=False) plural_form_b = put_accent_on_the_antepenultimate(plural_form_b, true_syllabification=False) gen_form_b = put_accent_on_the_antepenultimate(gen_form_b, true_syllabification=False) if plural_form_a in greek_corpus and gen_form_a in greek_corpus: noun_temp['nom_pl'] = plural_form_a noun_temp['gen_sg'] = gen_form_a elif plural_form_b in greek_corpus and gen_form_b in greek_corpus: noun_temp['nom_pl'] = plural_form_b noun_temp['gen_sg'] = gen_form_b elif plural_form_c in greek_corpus and gen_form_c in greek_corpus: noun_temp['nom_pl'] = plural_form_c noun_temp['gen_sg'] = gen_form_c elif plural_form_d in greek_corpus and gen_form_d in greek_corpus: noun_temp['nom_pl'] = plural_form_d noun_temp['gen_sg'] = gen_form_d elif noun[-1] in ['ξ', 'ψ', 'τ', 'ρ', 'β', 'ν', 'δ', 'ε', 'έ', 'ζ', 'κ', 'λ', 'μ'] and \ noun not in ['σεξ', 'σερ', 'φαξ', 'μπορ', 'μπαρ', 'μποξ'] and inflection != 'aklito': # not very common but existing 3rd declension nouns stems = [] if noun[-1] == 'ξ': stems.append(noun[:-1] + 'κ') stems.append(noun[:-1] + 'χ') stems.append(noun[:-1] + 'κτ') elif noun[-1] == 'ψ': stems.append(noun[:-1] + 'π') stems.append(noun[:-1] + 'φ') stems.append(noun[:-1] + 'πτ') stems.append(noun[:-1] + 'β') elif noun[-1] == 'ρ': stems.append(noun) stems.append(noun[:-1] + 'τ') if noun[-2:] == 'ωρ': stems.append(noun[:-2] + 'ορ') noun_temp['gender'] = 'masc' if 'μήτωρ' in noun: noun_temp['gender'] = 'fem' elif noun[-2:] == 'ώρ': stems.append(noun[:-2] + 'όρ') noun_temp['gender'] = 'masc' else: noun_temp['gender'] = 'neut' for stem in stems: plural_form = stem + 'ες' modern_form = stem + 'ας' plural_form_n = stem + 'α' gen_form = stem + 'ος' if count_syllables(stem) == 1: plural_form = put_accent_on_the_antepenultimate(plural_form) plural_form_n = put_accent_on_the_antepenultimate(plural_form_n) gen_form = put_accent_on_the_antepenultimate(gen_form) if gen_form not in greek_corpus: gen_form = put_accent_on_the_ultimate(gen_form) elif where_is_accent(stem) == ANTEPENULTIMATE: gen_form = put_accent_on_the_antepenultimate(gen_form) plural_form = put_accent_on_the_antepenultimate(plural_form) plural_form_n = put_accent_on_the_antepenultimate(plural_form_n) if (plural_form in greek_corpus or modern_form in greek_corpus) and noun not in ['πυρ']: noun_temp['nom_pl'] = plural_form if gen_form in greek_corpus or modern_form in greek_corpus: noun_temp['gen_sg'] = gen_form if gender: noun_temp['gender'] = gender # it's a bit crude way to correct gender but i cannot find a better one without a comprehensive list # gen_pl = remove_all_diacritics(plural_form[:-2]) + 'ών' # if gen_pl in greek_corpus: # noun_temp['gender'] = 'fem' return noun_temp else: if plural_form_n in greek_corpus or noun in ['έαρ']: noun_temp['gender'] = 'neut' noun_temp['gen_sg'] = gen_form if noun not in ['έαρ']: noun_temp['nom_pl'] = plural_form_n return noun_temp # else it is assumed it's either borrowing or some substantiated other things noun_temp['gender'] = 'neut' noun_temp['nom_pl'] = noun noun_temp['gen_sg'] = noun if noun in ['σπεσιαλιτέ', 'ρεσεψιόν']: # there are probably more such cases noun_temp['gender'] = 'fem' if noun in ['σερ']: # there should be added probably a lot of proper names, but I will deal with it by using # a flag proper_name_gender noun_temp['gender'] = 'masc' elif noun[-1] in ['ώ', 'ω']: if noun in ['ηχώ', 'πειθώ', 'φειδώ', 'βάβω']: # ancient feminina noun_temp['gender'] = 'fem' noun_temp['gen_sg'] = noun[:-1] + 'ούς' if noun in ['βάβω']: noun_temp['gen_sg'] = noun elif capital or proper_name: # feminine proper name noun_temp['gender'] = 'fem' noun_temp['gen_sg'] = noun + 'ς' else: noun_temp['gender'] = 'neut' noun_temp['nom_pl'] = noun noun_temp['gen_sg'] = noun elif noun[-1] in ['υ', 'ύ']: # ancient 3 declension, oksy , asty noun_temp['gender'] = 'neut' if noun[-2:] in ['ου', 'ού']: noun_temp['nom_pl'] = noun noun_temp['gen_sg'] = noun elif noun[-1] == 'υ': gen_1 = noun + 'ου' gen_1b = put_accent_on_the_penultimate(gen_1) plural = noun + 'α' if gen_1 in greek_corpus: noun_temp['gen_sg'] = gen_1 elif gen_1b in greek_corpus: noun_temp['gen_sg'] = gen_1b if plural in greek_corpus: noun_temp['nom_pl'] = plural if noun in ['άστυ', 'δόρυ']: noun_temp['nom_pl'] = noun[:-1] + 'η' noun_temp['gen_sg'] = noun[:-1] + 'εως' if noun in ['βράδυ']: noun_temp['nom_pl'] = noun[:-1] + 'ια' noun_temp['gen_sg'] = put_accent_on_the_ultimate(noun[:-1] + 'ιου') if noun in ['στάχυ', 'δίχτυ']: noun_temp['nom_pl'] = noun + 'α' noun_temp['gen_sg'] = put_accent_on_the_ultimate(noun + 'ου') if noun in ['δάκρυ']: noun_temp['nom_pl'] = noun + 'α' noun_temp['gen_sg'] = put_accent_on_the_penultimate(noun + 'ου', true_syllabification=False) elif noun[-1] in ['ύ']: thema = noun[:-1] + 'έ' gen = thema + 'ος' plur = thema + 'α' if gen in greek_corpus: noun_temp['gen_sg'] = gen if plur in greek_corpus: noun_temp['nom_pl'] = plur if not noun_temp['nom_pl'] and not noun_temp['gen_sg']: # aklita noun_temp['gender'] = 'neut' noun_temp['nom_pl'] = noun noun_temp['gen_sg'] = noun if noun.lower() in aklita_gender.keys(): noun_temp['gender'] = aklita_gender[noun.lower()] if gender: noun_temp['gender'] = gender if gender == 'fem_pl': noun_temp['gender'] = 'fem' noun_temp['nom_sg'] = '' noun_temp['nom_pl'] = noun noun_temp['gen_sg'] = '' elif gender == 'masc_pl': noun_temp['gender'] = 'masc' noun_temp['nom_sg'] = '' noun_temp['nom_pl'] = noun noun_temp['gen_sg'] = '' elif gender == 'neut_pl': noun_temp['gender'] = 'neut' noun_temp['nom_sg'] = '' noun_temp['nom_pl'] = noun noun_temp['gen_sg'] = '' elif gender == 'fem_sg': noun_temp['gender'] = 'fem' noun_temp['nom_sg'] = noun noun_temp['nom_pl'] = '' elif gender == 'masc_sg': noun_temp['gender'] = 'masc' noun_temp['nom_pl'] = '' elif gender == 'neut_sg': noun_temp['gender'] = 'neut' noun_temp['nom_pl'] = '' if noun in irregular_nouns.keys(): noun_temp = irregular_nouns[noun] if noun in diploklita.keys(): noun_temp['nom_pl'] = diploklita[noun] if inflection == 'aklito': noun_temp['nom_sg'] = noun if not proper_name: noun_temp['nom_pl'] = noun else: noun_temp['nom_pl'] = '' noun_temp['gen_sg'] = noun # check one more time these, that do not have flag aklito, but are surmised to be, maybe removing a prefix we will # be able to find out the correct declesion type if inflection != "aklito" and noun_temp['nom_pl'] == noun_temp['nom_sg']: for prefix in prefixes: pr_l = len(prefix) if prefix in noun and prefix == noun[:pr_l]: res = create_all_basic_noun_forms(noun[pr_l:]) new_res = {} for key in res.keys(): if key != 'gender': new_res[key] = prefix + res[key] new_res['gender'] = res['gender'] noun_temp = new_res break if capital: noun_temp = capitalize_basic_forms(noun_temp) return noun_temp