def _jooutput_xml(req, db, query): req.content_type = "application/xml" req.send_http_header() req.write((u'''<?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE wordlist SYSTEM "wordlist.dtd"> <!-- This file is generated by vocabulary management application Joukahainen. It contains entries from the vocabulary database of the Voikko project. The copyright holders are listed in file CONTRIBUTORS of current Suomi-malaga Voikko edition Git repository (or, if you have received this file as a part of Suomi-malaga, the file is located at the root directory of the source package). For more information, see http://joukahainen.puimula.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Time of generation: %s --> <wordlist xml:lang="fi"> ''' % time.strftime("%Y-%m-%d %H:%M:%S %Z")).encode('UTF-8')) flagMapByName = voikkoutils.readFlagAttributes(VOIKKO_DATA + "/words/flags.txt") flagMap = _convertFlagMapKeysToJoukahainenId(flagMapByName) results = db.query(("SELECT w.wid, w.word, w.class FROM (%s) w " + "WHERE w.wid NOT IN (SELECT f.wid FROM flag_attribute_value f " + " WHERE f.aid IN (24, 26)) " + "ORDER BY w.wid") % query) for result in results.getresult(): wid = result[0] word = unicode(result[1], 'UTF-8') wclass = result[2] _write_xml_word(db, req, wid, word, wclass, flagMap) req.write("</wordlist>\n")
# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA import sys sys.path.append("common") import hfconv import generate_lex_common import voikkoutils import xml.dom.minidom import codecs from xml.dom import Node flag_attributes = voikkoutils.readFlagAttributes( generate_lex_common.VOCABULARY_DATA + "/flags.txt") # Get command line options OPTIONS = generate_lex_common.get_options() # Inflection class map CLASSMAP = hfconv.compileClassmapREs(hfconv.modern_classmap) # No special vocabularies are built for Voikko generate_lex_common.SPECIAL_VOCABULARY = [] vocabularyFileSuffixes = [ "ep", "ee", "es", "em", "t", "nl", "l", "n", "h", "p", "a", "s", "c" ] vocabularyFiles = {} for fileSuffix in vocabularyFileSuffixes:
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA import sys sys.path.append("common") import hfconv import generate_lex_common import voikkoutils import xml.dom.minidom import codecs flag_attributes = voikkoutils.readFlagAttributes(generate_lex_common.VOCABULARY_DATA + "/flags.txt") # Get command line options OPTIONS = generate_lex_common.get_options() # Inflection class map CLASSMAP = hfconv.compileClassmapREs(hfconv.modern_classmap) # No special vocabularies are built for Voikko generate_lex_common.SPECIAL_VOCABULARY = [] main_vocabulary = generate_lex_common.open_lex(OPTIONS["destdir"], "joukahainen.lex") def frequency(word): fclass = word.getElementsByTagName("fclass") if len(fclass) == 0: return 7
# Returns flag names from given group for word in Joukahainen def get_flags_from_group(word, groupName): flags = [] for group in word.childNodes: if group.nodeType != Node.ELEMENT_NODE or group.tagName != groupName: continue for flag in group.childNodes: if flag.nodeType != Node.ELEMENT_NODE: continue if flag.tagName != "flag": continue flags.append(flag.firstChild.wholeText) return flags flag_attributes = voikkoutils.readFlagAttributes(VOCABULARY_DATA + "/flags.txt") def vowel_type(group): vtypes = group.getElementsByTagName("vtype") if len(vtypes) != 1: return voikkoutils.VOWEL_DEFAULT else: vtypes = tValue(vtypes[0]) if vtypes == 'a': return voikkoutils.VOWEL_BACK elif vtypes == 'ä': return voikkoutils.VOWEL_FRONT else: return voikkoutils.VOWEL_BOTH def has_flag(word, flag): if flag in tValues(word, "flag"): return True return False
for flag in group.childNodes: if flag.nodeType != Node.ELEMENT_NODE: continue if flag.tagName != "flag": continue flagAttribute = flag_attributes[group.tagName + u"/" + tValue(flag)] if flagAttribute.malagaFlag != None: malagaFlags.append(flagAttribute.malagaFlag) if len(malagaFlags) == 0: return u"" flag_string = u", tiedot: <" for flag in malagaFlags: flag_string = flag_string + flag + u"," flag_string = flag_string[:-1] + u">" return flag_string flag_attributes = voikkoutils.readFlagAttributes(VOCABULARY_DATA + u"/flags.txt") def vowel_type(group): vtypes = group.getElementsByTagName("vtype") if len(vtypes) != 1: return voikkoutils.VOWEL_DEFAULT else: vtypes = tValue(vtypes[0]) if vtypes == u'a': return voikkoutils.VOWEL_BACK elif vtypes == u'ä': return voikkoutils.VOWEL_FRONT else: return voikkoutils.VOWEL_BOTH def has_flag(word, flag): if flag in tValues(word, "flag"): return True return False # Returns tuple (alku, jatko) for given word in Joukahainen