Esempio n. 1
0
def _jooutput_xml(req, db, query):
	req.content_type = "application/xml"
	req.send_http_header()
	req.write((u'''<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE wordlist SYSTEM "wordlist.dtd">
<!--
  This file is generated by vocabulary management application Joukahainen.
  It contains entries from the vocabulary database of the Voikko project.
  The copyright holders are listed in file CONTRIBUTORS of current Suomi-malaga
  Voikko edition Git repository (or, if you have received this file as a
  part of Suomi-malaga, the file is located at the root directory of the source
  package). For more information, see http://joukahainen.puimula.org

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

  Time of generation: %s
-->
<wordlist xml:lang="fi">
''' % time.strftime("%Y-%m-%d %H:%M:%S %Z")).encode('UTF-8'))
	
	flagMapByName = voikkoutils.readFlagAttributes(VOIKKO_DATA + "/words/flags.txt")
	flagMap = _convertFlagMapKeysToJoukahainenId(flagMapByName)
	
	results = db.query(("SELECT w.wid, w.word, w.class FROM (%s) w " +
	"WHERE w.wid NOT IN (SELECT f.wid FROM flag_attribute_value f " +
	" WHERE f.aid IN (24, 26)) " +
	"ORDER BY w.wid") % query)
	for result in results.getresult():
		wid = result[0]
		word = unicode(result[1], 'UTF-8')
		wclass = result[2]
		_write_xml_word(db, req, wid, word, wclass, flagMap)
	
	req.write("</wordlist>\n")
Esempio n. 2
0
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

import sys

sys.path.append("common")
import hfconv
import generate_lex_common
import voikkoutils
import xml.dom.minidom
import codecs
from xml.dom import Node

flag_attributes = voikkoutils.readFlagAttributes(
    generate_lex_common.VOCABULARY_DATA + "/flags.txt")

# Get command line options
OPTIONS = generate_lex_common.get_options()

# Inflection class map
CLASSMAP = hfconv.compileClassmapREs(hfconv.modern_classmap)

# No special vocabularies are built for Voikko
generate_lex_common.SPECIAL_VOCABULARY = []

vocabularyFileSuffixes = [
    "ep", "ee", "es", "em", "t", "nl", "l", "n", "h", "p", "a", "s", "c"
]
vocabularyFiles = {}
for fileSuffix in vocabularyFileSuffixes:
Esempio n. 3
0
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

import sys
sys.path.append("common")
import hfconv
import generate_lex_common
import voikkoutils
import xml.dom.minidom
import codecs

flag_attributes = voikkoutils.readFlagAttributes(generate_lex_common.VOCABULARY_DATA + "/flags.txt")

# Get command line options
OPTIONS = generate_lex_common.get_options()

# Inflection class map
CLASSMAP = hfconv.compileClassmapREs(hfconv.modern_classmap)

# No special vocabularies are built for Voikko
generate_lex_common.SPECIAL_VOCABULARY = []

main_vocabulary = generate_lex_common.open_lex(OPTIONS["destdir"], "joukahainen.lex")

def frequency(word):
	fclass = word.getElementsByTagName("fclass")
	if len(fclass) == 0: return 7
Esempio n. 4
0
# Returns flag names from given group for word in Joukahainen
def get_flags_from_group(word, groupName):
    flags = []
    for group in word.childNodes:
        if group.nodeType != Node.ELEMENT_NODE or group.tagName != groupName:
            continue
        for flag in group.childNodes:
            if flag.nodeType != Node.ELEMENT_NODE:
                continue
            if flag.tagName != "flag":
                continue
            flags.append(flag.firstChild.wholeText)
    return flags


flag_attributes = voikkoutils.readFlagAttributes(VOCABULARY_DATA +
                                                 "/flags.txt")


def vowel_type(group):
    vtypes = group.getElementsByTagName("vtype")
    if len(vtypes) != 1: return voikkoutils.VOWEL_DEFAULT
    else:
        vtypes = tValue(vtypes[0])
        if vtypes == 'a': return voikkoutils.VOWEL_BACK
        elif vtypes == 'ä': return voikkoutils.VOWEL_FRONT
        else: return voikkoutils.VOWEL_BOTH


def has_flag(word, flag):
    if flag in tValues(word, "flag"): return True
    return False
Esempio n. 5
0
		for flag in group.childNodes:
			if flag.nodeType != Node.ELEMENT_NODE:
				continue
			if flag.tagName != "flag":
				continue
			flagAttribute = flag_attributes[group.tagName + u"/" + tValue(flag)]
			if flagAttribute.malagaFlag != None:
				malagaFlags.append(flagAttribute.malagaFlag)
	if len(malagaFlags) == 0: return u""
	flag_string = u", tiedot: <"
	for flag in malagaFlags:
		flag_string = flag_string + flag + u","
	flag_string = flag_string[:-1] + u">"
	return flag_string

flag_attributes = voikkoutils.readFlagAttributes(VOCABULARY_DATA + u"/flags.txt")

def vowel_type(group):
	vtypes = group.getElementsByTagName("vtype")
	if len(vtypes) != 1: return voikkoutils.VOWEL_DEFAULT
	else:
		vtypes = tValue(vtypes[0])
		if vtypes == u'a': return voikkoutils.VOWEL_BACK
		elif vtypes == u'ä': return voikkoutils.VOWEL_FRONT
		else: return voikkoutils.VOWEL_BOTH

def has_flag(word, flag):
	if flag in tValues(word, "flag"): return True
	return False

# Returns tuple (alku, jatko) for given word in Joukahainen