def format_gf_lexicon_entry(word, rektion): verbtype, lin = get_lin(word, rektion) funname = gfutils.get_funname(word, verbtype) print('{0}\t{1} = mk{2} {3} ;'.format(word, funname, verbtype, lin))
def format_gf_lexicon_entry(word, rektion): verbtype,lin = get_lin(word, rektion) funname = gfutils.get_funname(word, verbtype) print('{0}\t{1} = mk{2} {3} ;'.format(word, funname, verbtype, lin))
except KeyError: pass return text # leave as is return re.sub("&#?\w+;", fixup, text) def get_forms(word): if word in lemma_to_forms: return lemma_to_forms[word] return '"' + word + '"' def is_illegal(word): return (word in illegal) or not (word in lemma_to_forms) args = gfutils.get_args() lemma_to_forms = gfutils.get_lemma_to_forms(args.forms) line_number = 0 for line in sys.stdin: line_number += 1 line = line.strip() fields = line.split(':') word = unescape(fields[0]) parts = word.split(' ') verb = parts[-1] if re.match('^[a-zõäöüšž]+$', verb) and not is_illegal(verb): funname = gfutils.get_funname(word, 'V') print('{0}\t{1} = mkV "{2}" (mkV {3}) ;'.format(word, funname, ' '.join(parts[0:-1]), get_forms(verb))) else: print("Warning: line " + str(line_number) + ": ignoring: " + word, file=sys.stderr)
args = gfutils.get_args() lemma_to_forms = gfutils.get_lemma_to_forms(args.forms) # The input line can have the Filosoft tag after the '//' sign (which we ignore). # The word itself can contain spaces to denote compound word segment borders. line_number = 0 for line in sys.stdin: line_number += 1 line = line.strip() word = re.sub(' //.*', '', line) word = word.strip() parts = word.split(' ') word = re.sub(' ', '', word) if re.match('^[a-zõäöüšž]+$', word) and not is_illegal(word): funname = gfutils.get_funname(word, args.pos) found_word = False entry = '' for i in range(0, len(parts)): lemma = ''.join(parts[i:]) if lemma in lemma_to_forms: if i == 0: entry = 'mkN %s' % (lemma_to_forms[lemma]) else: # The word is a compound word prefix = ''.join(parts[:i]) # We do not accept single character prefixes. # TODO: These might make sense, depending on how they are coded (ekiri vs e-kiri) # TODO: What about 'apriori'? if len(prefix) == 1: continue
def format_gf_lexicon_entry(word, rektion): verbtype, lin = get_lin(word, rektion) funname = gfutils.get_funname(word, verbtype) print "%s\t%s = mk%s %s ;" % (word, funname, verbtype, lin)
#!/usr/bin/env python # -*- coding: utf-8 -*- # @author Kaarel Kaljurand # @version 2013-10-01 import sys import re import gfutils for line in sys.stdin: line = line.strip() word = re.sub(' //.*', '', line) word = word.strip() funname = gfutils.get_funname(word, 'Adv') print '%s\t%s = mkAdv "%s" ;' % (word, funname, word)