Esempio n. 1
0
 def getff_tm(self, fn, enc='utf-8', dlm='\t', mdlm=None):
     mdlm = mdlm and mdlm or self.mdlm
     for l in utils.get_lines(fn, enc, strip=1):
         # morpheme mapping
         [t1, t2] = l.split(dlm)
         self.tm[t1] = self.tm.get(t1, {})
         self.tm[t1][t2] = 1
Esempio n. 2
0
 def getff_pc(self, fn, enc=None, pdlm=None, ph=None):
     enc = enc or self.de
     pdlm = pdlm or self.pc_dlm
     ph = ph or self.ph
     cats = {}
     for l in utils.get_lines(fn, enc, strip=1):
         if l.startswith(ph):
             if not cats:
                 for i, cat in enumerate(l[1:].split(self.pc_dlm)):
                     cats[cat] = i
             continue
         elif not cats:
             continue
         vals = l.split(pdlm)
         lyses = vals[cats['[lyses]']:]
         self.pc[vals[0]] = lyses
Esempio n. 3
0
 def getff_lkp(self, fn, enc=None, dlm=None):
     enc = enc or self.de
     dlm = dlm or self.gen_dlm
     for l in utils.get_lines(fn, enc, strip=1):
         [sf, tg, cnt] = l.split(dlm)
         self.lkp[sf] = self.lkp.get(sf, []) + [tg]
Esempio n. 4
0
 def getff_unts(self, fn, enc='utf-8'):
     self.unts = utils.get_lines(fn, enc, strip=1)
Esempio n. 5
0
 def getff_sfx(self, fn, enc='utf-8', dlm='\t', mdlm=None):
     for l in utils.get_lines(fn, enc, strip=1):
         [sf, sfx] = l.split(dlm)
         self.sfx[sf] = self.sfx.get(sf, {})
         self.sfx[sf][sfx] = 1