def counts(wordlist): for word in wordlist: stages = -2 length = [] result = [] for form in word: if form != detone(form): stage, gl = lemmatize(form, wl) else: stage, gl = lemmatize(form,wl_detone) if stages < stage: stages = stage result.extend(gl) length = len(dict_disambiguate(result)) yield (stages, length, word)
from orthograph import convertw, detone from morphology import lemmatize, dict_disambiguate, print_gloss import re ci = ConcordanceIndex(test.words(), key=lambda s: s.lower()) types = list(set([s.lower() for s in set(test.words())])) types.sort() for word in types: if not re.search(r'[0-9.,;:!?]', word): ci.print_concordance(word, lines=15) print nw = convertw(word) nwl = [w for w in nw if w in wl] if nwl: formlist = nwl else: formlist = nw result = [] for form in formlist: if form != detone(form): stage, gl = lemmatize(form, wl) else: stage, gl = lemmatize(form, wl_detone) result.extend(gl) glstr = [print_gloss(g) for g in dict_disambiguate(result)] for gs in glstr: print " ", gs.encode('utf-8') print
import re ci = ConcordanceIndex(test.words(), key=lambda s:s.lower()) types = list(set([s.lower() for s in set(test.words())])) types.sort() for word in types: if not re.search(r'[0-9.,;:!?]', word): ci.print_concordance(word, lines=15) print nw = convertw(word) nwl = [w for w in nw if w in wl] if nwl: formlist = nwl else: formlist = nw result = [] for form in formlist: if form != detone(form): stage, gl = lemmatize(form, wl) else: stage, gl = lemmatize(form,wl_detone) result.extend(gl) glstr = [print_gloss(g) for g in dict_disambiguate(result)] for gs in glstr: print " ", gs.encode('utf-8') print
ww = [re.split(r'[- .,;!?"]', s) for s in sent if s] words = [[w for w in wlist if w] for wlist in ww] for sentence in words: i = 0 print(' '.join(sentence).strip('\n\t')) for word in sentence: nw = convertw(word) nwl = [w for w in nw if w in wl] if nwl: formlist = nwl else: formlist = nw print('/'.join(formlist), "\t(",) for j in range(len(sentence)): if j == i: print(sentence[j].upper() + " ",) else: print(sentence[j] + " ",) print(")") for form in formlist: gl = dict_disambiguate(lemmatize(form, wl)) glstr = [print_gloss(g) for g in gl] for gs in glstr: print(" ", gs) i += 1 print print
ww = [re.split(r'[- .,;!?"]', s) for s in sent if s] words = [[w for w in wlist if w] for wlist in ww] for sentence in words: i = 0 print ' '.join(sentence).strip('\n\t').encode('utf-8') for word in sentence: nw = convertw(word) nwl = [w for w in nw if w in wl] if nwl: formlist = nwl else: formlist = nw print '/'.join(formlist).encode('utf-8'), "\t(", for j in range(len(sentence)): if j == i: print sentence[j].upper().encode('utf-8') + " ", else: print sentence[j].encode('utf-8') + " ", print ")" for form in formlist: gl = dict_disambiguate(lemmatize(form, wl)) glstr = [print_gloss(g) for g in gl] for gs in glstr: print " ", gs.encode('utf-8') i += 1 print print