Esempio n. 1
0
def format_gf_lexicon_entry(word, rektion):
    verbtype, lin = get_lin(word, rektion)
    funname = gfutils.get_funname(word, verbtype)
    print('{0}\t{1} = mk{2} {3} ;'.format(word, funname, verbtype, lin))
Esempio n. 2
0
def format_gf_lexicon_entry(word, rektion):
	verbtype,lin = get_lin(word, rektion)
	funname = gfutils.get_funname(word, verbtype)
	print('{0}\t{1} = mk{2} {3} ;'.format(word, funname, verbtype, lin))
Esempio n. 3
0
			except KeyError:
				pass
		return text # leave as is
	return re.sub("&#?\w+;", fixup, text)

def get_forms(word):
	if word in lemma_to_forms:
		return lemma_to_forms[word]
	return '"' + word + '"'

def is_illegal(word):
	return (word in illegal) or not (word in lemma_to_forms)

args = gfutils.get_args()

lemma_to_forms = gfutils.get_lemma_to_forms(args.forms)

line_number = 0
for line in sys.stdin:
	line_number += 1
	line = line.strip()
	fields = line.split(':')
	word = unescape(fields[0])
	parts = word.split(' ')
	verb = parts[-1]
	if re.match('^[a-zõäöüšž]+$', verb) and not is_illegal(verb):
		funname = gfutils.get_funname(word, 'V')
		print('{0}\t{1} = mkV "{2}" (mkV {3}) ;'.format(word, funname, ' '.join(parts[0:-1]), get_forms(verb)))
	else:
		print("Warning: line " + str(line_number) + ": ignoring: " + word, file=sys.stderr)
Esempio n. 4
0
args = gfutils.get_args()

lemma_to_forms = gfutils.get_lemma_to_forms(args.forms)

# The input line can have the Filosoft tag after the '//' sign (which we ignore).
# The word itself can contain spaces to denote compound word segment borders.
line_number = 0
for line in sys.stdin:
    line_number += 1
    line = line.strip()
    word = re.sub(' //.*', '', line)
    word = word.strip()
    parts = word.split(' ')
    word = re.sub(' ', '', word)
    if re.match('^[a-zõäöüšž]+$', word) and not is_illegal(word):
        funname = gfutils.get_funname(word, args.pos)
        found_word = False
        entry = ''
        for i in range(0, len(parts)):
            lemma = ''.join(parts[i:])
            if lemma in lemma_to_forms:
                if i == 0:
                    entry = 'mkN %s' % (lemma_to_forms[lemma])
                else:
                    # The word is a compound word
                    prefix = ''.join(parts[:i])
                    # We do not accept single character prefixes.
                    # TODO: These might make sense, depending on how they are coded (ekiri vs e-kiri)
                    # TODO: What about 'apriori'?
                    if len(prefix) == 1:
                        continue
Esempio n. 5
0
def format_gf_lexicon_entry(word, rektion):
    verbtype, lin = get_lin(word, rektion)
    funname = gfutils.get_funname(word, verbtype)
    print "%s\t%s = mk%s %s ;" % (word, funname, verbtype, lin)
Esempio n. 6
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# @author Kaarel Kaljurand
# @version 2013-10-01

import sys
import re
import gfutils

for line in sys.stdin:
	line = line.strip()
	word = re.sub(' //.*', '', line)
	word = word.strip()
	funname = gfutils.get_funname(word, 'Adv')
	print '%s\t%s = mkAdv "%s" ;' % (word, funname, word)