Beispiel #1
0
        if len(morphlist) > 2:
            result.append(tuple(morphlist[:-1]))
            newlist = sfec(result, morphlist)
        else:
            newlist.extend([(m, ) for m in morphlist])
    elif len(result[-1]) > 2:
        result.append(result[-1][:-1])
        newlist = sfec(result, morphlist)
    else:
        newlist = result
        newlist.extend([(m, ) for m in morphlist])
    return newlist


elist = bailleul.entries('bailleul.txt')
etree = e.ElementTree(bailleul.xml('bailleul.txt'))

morphdict = {}

for entry in etree.findall('record'):
    lx = entry.findtext('lx', default='')
    ge = entry.findtext('ge', default='')
    ps = entry.findtext('ps')
    lems = [lx]
    for va in entry.findall('va'):
        lems.append(va.text)
    for v in lems:
        lemma = v.strip(u'-')
        morphdict.setdefault(tuple(re.split(r'[.-]',
                                            v.strip(u'-').lower())),
                             []).append((lemma, ps, ge))
Beispiel #2
0
#!/usr/bin/python
from bamana import bailleul
from xml.etree.ElementTree import ElementTree

lexicon = ElementTree(bailleul.xml('bailleul.txt'))

tagset = [u'adj', u'adv', u'conj', u'cop', u'dtm', u'intj', u'n', u'nr', u'num', u'onomat', u'pers', u'pm', u'pm', u'pp', u'prep', u'prn', u'prt', u'ptcp', u'v', u'vq', u'mrph', u'expr']

for entry in lexicon.findall('record'):
    ps = [p.text for p in entry.findall('ps')]
    lx = entry.find('lx').text
    if len(ps) < 1:
        print u'{0:18} {1:17} '.format(lx, u"PS TAG MISSING!").encode('utf-8')
    elif len(ps) > 1:
        print u'{0:18} {1:17} '.format(lx, u"MULTIPLE PS TAGS:").encode('utf-8'),
        for t in ps:
            print t.encode('utf-8'),
        print
    elif len([f for f in ps[0].split(u'/') if f not in tagset]) > 0:
        print u'{0:18} {1:17} '.format(lx, u"NOT IN TAGSET:").encode('utf-8'),
        print ps[0].encode('utf-8')

Beispiel #3
0
    if not result:
        if len(morphlist) > 2:
            result.append(tuple(morphlist[:-1]))
            newlist = sfec(result, morphlist)
        else:
            newlist.extend([(m,) for m in morphlist])
    elif len(result[-1]) > 2:
        result.append(result[-1][:-1])
        newlist = sfec(result, morphlist)
    else:
        newlist = result
        newlist.extend([(m,) for m in morphlist])
    return newlist

elist = bailleul.entries('bailleul.txt')
etree = e.ElementTree(bailleul.xml('bailleul.txt'))

morphdict = {}

for entry in etree.findall('record'):
    lx = entry.findtext('lx', default='')
    ge = entry.findtext('ge', default='')
    ps = entry.findtext('ps')
    lems = [lx]
    for va in entry.findall('va'):
        lems.append(va.text)
    for v in lems:
        lemma = v.strip(u'-')
        morphdict.setdefault(tuple(re.split(r'[.-]', v.strip(u'-').lower())), []).append((lemma,ps,ge))
        if v != detone(v):
            morphdict.setdefault(tuple(re.split(r'[.-]', detone(v.strip(u'-').lower()))), []).append((lemma,ps,ge))