Example #1
0
 def do_rdngs(self, elems, entr, fmap):
     if elems is None: return
     rdngs = getattr(entr, '_rdng', [])
     kanjs = getattr(entr, '_kanj', [])
     rdngs = []
     dupchk = {}
     for ord, elem in enumerate(elems):
         txt = elem.find('reb').text
         if not jdb.unique(txt, dupchk):
             self.warn("Duplicate reb text: '%s'" % txt)
             continue
         if not jdb.jstr_reb(txt):
             self.warn("reb text '%s' not kana." % txt)
         rdng = jdb.Rdng(rdng=ord + 1, txt=txt)
         self.do_kws(elem.findall('re_inf'), rdng, '_inf', 'RINF')
         for x in elem.findall('re_pri'):
             freqtuple = self.parse_freq(x.text, "re_pri")
             if not freqtuple: continue
             rlist = fmap[freqtuple][0]
             if not jdb.isin(rdng, rlist): rlist.append(rdng)
             else: self.freq_warn("Duplicate", rdng, None, x.text)
         nokanji = elem.find('re_nokanji')
         self.do_restr(elem.findall('re_restr'), rdng, kanjs, 'restr',
                       nokanji)
         self.do_audio(elem.findall("audio"), rdng, jdb.Rdngsnd)
         rdngs.append(rdng)
     if rdngs: entr._rdng = rdngs
Example #2
0
 def do_gloss(self, elems, sens, xlit=False, xlang=None):
     XKW = self.XKW
     glosses = []
     lits = []
     lsrc = []
     dupchk = {}
     for elem in elems:
         lng = elem.get('{http://www.w3.org/XML/1998/namespace}lang')
         try:
             lang = XKW.LANG[lng].id if lng else XKW.LANG['eng'].id
         except KeyError:
             self.warn("Invalid gloss lang attribute: '%s'" % lng)
             continue
         txt = elem.text
         if not jdb.jstr_gloss(txt):
             self.warn("gloss text '%s' not latin characters." % txt)
         lit = []
         if xlit and ('lit:' in txt):
             txt, lit = extract_lit(txt)
         if not jdb.unique((lang, txt), dupchk):
             self.warn("Duplicate lang/text in gloss '%s'/'%s'" %
                       (lng, txt))
             continue
         # (entr,sens,gloss,lang,txt)
         if txt and (not xlang or lang in xlang):
             glosses.append(
                 jdb.Gloss(lang=lang, ginf=XKW.GINF['equ'].id, txt=txt))
         if lit:
             lits.extend([
                 jdb.Gloss(lang=lang, ginf=XKW.GINF['lit'].id, txt=x)
                 for x in lit
             ])
     if glosses or lits:
         if not hasattr(sens, '_gloss'): sens._gloss = []
         sens._gloss.extend(glosses + lits)
Example #3
0
 def do_kanjs (self, elems, entr, fmap):
     if elems is None: return
     kanjs = []; dupchk = {}
     for ord, elem in enumerate (elems):
         txt = elem.find('keb').text
         if not jdb.unique (txt, dupchk):
             self.warn ("Duplicate keb text: '%s'" % txt); continue
         if not (jdb.jstr_keb (txt)):
             self.warn ("keb text '%s' not kanji." % txt)
         kanj = jdb.Kanj (kanj=ord+1, txt=txt)
         self.do_kws (elem.findall('ke_inf'), kanj, '_inf', 'KINF')
         for x in elem.findall ('ke_pri'):
             freqtuple = self.parse_freq (x.text, "ke_pri")
             if not freqtuple: continue
             klist = fmap[freqtuple][1]
             if not jdb.isin (kanj, klist): klist.append (kanj)
             else: self.freq_warn ("Duplicate", None, kanj, x.text)
         kanjs.append (kanj)
     if kanjs: entr._kanj = kanjs