def do_rdngs(self, elems, entr, fmap): if elems is None: return rdngs = getattr(entr, '_rdng', []) kanjs = getattr(entr, '_kanj', []) rdngs = [] dupchk = {} for ord, elem in enumerate(elems): txt = elem.find('reb').text if not jdb.unique(txt, dupchk): self.warn("Duplicate reb text: '%s'" % txt) continue if not jdb.jstr_reb(txt): self.warn("reb text '%s' not kana." % txt) rdng = jdb.Rdng(rdng=ord + 1, txt=txt) self.do_kws(elem.findall('re_inf'), rdng, '_inf', 'RINF') for x in elem.findall('re_pri'): freqtuple = self.parse_freq(x.text, "re_pri") if not freqtuple: continue rlist = fmap[freqtuple][0] if not jdb.isin(rdng, rlist): rlist.append(rdng) else: self.freq_warn("Duplicate", rdng, None, x.text) nokanji = elem.find('re_nokanji') self.do_restr(elem.findall('re_restr'), rdng, kanjs, 'restr', nokanji) self.do_audio(elem.findall("audio"), rdng, jdb.Rdngsnd) rdngs.append(rdng) if rdngs: entr._rdng = rdngs
def merge_freqs (entr): # This function is used by code that contructs Entr objects # by parsing a textual entry description. Generally such code # will parse freq (a.k.a. prio) tags for readings and kanji # individually. Before the entry is used, these independent # tags must be combined so that a rdng/kanj pairs with the # same freq tag point to a single Freq object. This function # does that merging. # It expects the entry's Rdng and Kanj objects to have a temp # attribute named "_FREQ" that contains a list of 2-tuples. # Each 2-tuple contains the freq table kw id number, and the # freq value. After merge_freqs() runs, all those .FREQ # attributes will have been deleted, and .freq attributes # created with equivalent, properly linked Freq objects. fmap = defaultdict (lambda:([list(),list()])) # Collect the info in .FREQ attributes from all the readings. for r in getattr (entr, '_rdng', []): for kw_val in getattr (r, '_FREQ', []): # 'kw_val' is a 2-tuple denoting the freq as a freq table # keyword id and freq value pair. rlist = fmap[(kw_val)][0] # Add 'r' to rlist if it is not there already. # Use first() as a "in" operator that uses "is" rather # than "==" as compare function. if not jdb.isin (r, rlist): rlist.append (r) if hasattr (r, '_FREQ'): del r._FREQ # Collect the info in .FREQ attributes from all the kanji. # This works on kanj's the same as above section works on # rdng's and comments above apply here too. for k in getattr (entr, '_kanj', []): for kw_val in getattr (k, '_FREQ', []): klist = fmap[(kw_val)][1] if not jdb.isin (k, klist): klist.append (k) if hasattr (k, '_FREQ'): del k._FREQ # 'fmap' now has one entry for every unique freq (kw,value) tuple # which is a pair of sets. The first set consists of all Rdng # objects that (kw,value) freq spec applies to. The second is # the set of all kanji it applies to. We take all combinations # of readings with kanji, and create a Freq object for each. errs = jdb.make_freq_objs (fmap, entr) return errs
def do_kanjs (self, elems, entr, fmap): if elems is None: return kanjs = []; dupchk = {} for ord, elem in enumerate (elems): txt = elem.find('keb').text if not jdb.unique (txt, dupchk): self.warn ("Duplicate keb text: '%s'" % txt); continue if not (jdb.jstr_keb (txt)): self.warn ("keb text '%s' not kanji." % txt) kanj = jdb.Kanj (kanj=ord+1, txt=txt) self.do_kws (elem.findall('ke_inf'), kanj, '_inf', 'KINF') for x in elem.findall ('ke_pri'): freqtuple = self.parse_freq (x.text, "ke_pri") if not freqtuple: continue klist = fmap[freqtuple][1] if not jdb.isin (kanj, klist): klist.append (kanj) else: self.freq_warn ("Duplicate", None, kanj, x.text) kanjs.append (kanj) if kanjs: entr._kanj = kanjs