Exemple #1
0
sys.path.append("common")
import hfconv
import generate_lex_common
import voikkoutils
import xml.dom.minidom
import codecs
from xml.dom import Node

flag_attributes = voikkoutils.readFlagAttributes(
    generate_lex_common.VOCABULARY_DATA + "/flags.txt")

# Get command line options
OPTIONS = generate_lex_common.get_options()

# Inflection class map
CLASSMAP = hfconv.compileClassmapREs(hfconv.modern_classmap)

# No special vocabularies are built for Voikko
generate_lex_common.SPECIAL_VOCABULARY = []

vocabularyFileSuffixes = [
    "ep", "ee", "es", "em", "t", "nl", "l", "n", "h", "p", "a", "s", "c"
]
vocabularyFiles = {}
for fileSuffix in vocabularyFileSuffixes:
    vocFile = codecs.open(
        OPTIONS["destdir"] + "/joukahainen-" + fileSuffix + ".lexc", 'w',
        'UTF-8')
    vocFile.write(
        "! This is automatically generated intermediate lexicon file for\n")
    vocFile.write(
import sys
sys.path.append("common")
import hfconv
import generate_lex_common
import voikkoutils
import xml.dom.minidom
import codecs

flag_attributes = voikkoutils.readFlagAttributes(generate_lex_common.VOCABULARY_DATA + "/flags.txt")

# Get command line options
OPTIONS = generate_lex_common.get_options()

# Inflection class map
CLASSMAP = hfconv.compileClassmapREs(hfconv.modern_classmap)

# No special vocabularies are built for Voikko
generate_lex_common.SPECIAL_VOCABULARY = []

main_vocabulary = generate_lex_common.open_lex(OPTIONS["destdir"], "joukahainen.lex")

def frequency(word):
	fclass = word.getElementsByTagName("fclass")
	if len(fclass) == 0: return 7
	return int(generate_lex_common.tValue(fclass[0]))

# Check the style flags of the word according to current options.
# Returns True if the word is acceptable, otherwise returns false.
def check_style(word):
	global OPTIONS
Exemple #3
0
            (None, u"(..*CO)itUs", u"aivoitus"),
            (None, u"(...*O)tUs", u"jaotus"),
            (None, u"(.*V)s", u"vastaus"),
        ],
    ),
    (u"veranta", u"sw", [(u"nt", u"(.*n)tA", u"veranta")]),
    (
        u"vieras",
        u"ws",
        [(None, u"(.*[lr]iA)s", u"utelias"), (u"k", u"(.*mek)As", u"iäkäs"), (u"k", u"(.*k)As", u"varas")],
    ),
    (u"vihanta", u"sw", [(u"nt", u"(.*n)tA", u"vihanta")]),
    (u"virkkaa", u"sw", [(u"kk", u"(.*k)kAA", u"virkkaa")]),
]

classmap = hfconv.compileClassmapREs(historical)
classmap.extend(hfconv.compileClassmapREs(hfconv.modern_classmap))

pattern = (
    u"^(?P<alku>.*)(?:"
    + u"(?P<keltainen>C[aouyäö]i?nen)|"
    + u"(?P<symboli_ym>[^aeouyäö]o[dfglmnrv]i)|"
    + u"(?P<maineikas>[mntv]eikAs)"
    + u")$"
)

pattern = pattern.replace(u"A", u"[aä]")
pattern = pattern.replace(u"O", u"[oö]")
pattern = pattern.replace(u"U", u"[uy]")
pattern = pattern.replace(u"C", u"[bcdfghjklmnpqrstvwxzšžçðñþß]")
rx = re.compile(pattern, re.IGNORECASE)
Exemple #4
0
    (u'valmis', u'ws', [(None, u'(.*)is', u'valmis')]),
    (u'vastaus', u'-', [
        (None, u'(lootu)s', u'vastaus'),
        (None, u'(..*CO)itUs', u'aivoitus'),
        (None, u'(...*O)tUs', u'jaotus'),
        (None, u'(.*V)s', u'vastaus'),
    ]),
    (u'veranta', u'sw', [(u'nt', u'(.*n)tA', u'veranta')]),
    (u'vieras', u'ws', [(None, u'(.*[lr]iA)s', u'utelias'),
                        (u'k', u'(.*mek)As', u'iäkäs'),
                        (u'k', u'(.*k)As', u'varas')]),
    (u'vihanta', u'sw', [(u'nt', u'(.*n)tA', u'vihanta')]),
    (u'virkkaa', u'sw', [(u'kk', u'(.*k)kAA', u'virkkaa')])
]

classmap = hfconv.compileClassmapREs(historical)
classmap.extend(hfconv.compileClassmapREs(hfconv.modern_classmap))

pattern = u"^(?P<alku>.*)(?:" + \
   u"(?P<keltainen>C[aouyäö]i?nen)|" + \
   u"(?P<symboli_ym>[^aeouyäö]o[dfglmnrv]i)|" + \
   u"(?P<maineikas>[mntv]eikAs)" + \
          u")$"

pattern = pattern.replace(u"A", u"[aä]")
pattern = pattern.replace(u"O", u"[oö]")
pattern = pattern.replace(u"U", u"[uy]")
pattern = pattern.replace(u"C", u"[bcdfghjklmnpqrstvwxzšžçðñþß]")
rx = re.compile(pattern, re.IGNORECASE)

begin = u"(amerikan|jälleen|tiibetin|uudelleen).+"