Ejemplo n.º 1
0
 def do_rdngs(self, elems, entr, fmap):
     if elems is None: return
     rdngs = getattr(entr, '_rdng', [])
     kanjs = getattr(entr, '_kanj', [])
     rdngs = []
     dupchk = {}
     for ord, elem in enumerate(elems):
         txt = elem.find('reb').text
         if not jdb.unique(txt, dupchk):
             self.warn("Duplicate reb text: '%s'" % txt)
             continue
         if not jdb.jstr_reb(txt):
             self.warn("reb text '%s' not kana." % txt)
         rdng = jdb.Rdng(rdng=ord + 1, txt=txt)
         self.do_kws(elem.findall('re_inf'), rdng, '_inf', 'RINF')
         for x in elem.findall('re_pri'):
             freqtuple = self.parse_freq(x.text, "re_pri")
             if not freqtuple: continue
             rlist = fmap[freqtuple][0]
             if not jdb.isin(rdng, rlist): rlist.append(rdng)
             else: self.freq_warn("Duplicate", rdng, None, x.text)
         nokanji = elem.find('re_nokanji')
         self.do_restr(elem.findall('re_restr'), rdng, kanjs, 'restr',
                       nokanji)
         self.do_audio(elem.findall("audio"), rdng, jdb.Rdngsnd)
         rdngs.append(rdng)
     if rdngs: entr._rdng = rdngs
Ejemplo n.º 2
0
def merge_freqs (entr):
        # This function is used by code that contructs Entr objects
        # by parsing a textual entry description.  Generally such code
        # will parse freq (a.k.a. prio) tags for readings and kanji
        # individually.  Before the entry is used, these independent
        # tags must be combined so that a rdng/kanj pairs with the
        # same freq tag point to a single Freq object.  This function
        # does that merging.
        # It expects the entry's Rdng and Kanj objects to have a temp
        # attribute named "_FREQ" that contains a list of 2-tuples.
        # Each 2-tuple contains the freq table kw id number, and the
        # freq value.  After  merge_freqs() runs, all those .FREQ
        # attributes will have been deleted, and .freq attributes
        # created with equivalent, properly linked Freq objects.

        fmap = defaultdict (lambda:([list(),list()]))

          # Collect the info in .FREQ attributes from all the readings.
        for r in getattr (entr, '_rdng', []):
            for kw_val in getattr (r, '_FREQ', []):
                  # 'kw_val' is a 2-tuple denoting the freq as a freq table
                  # keyword id and freq value pair.
                rlist = fmap[(kw_val)][0]
                  # Add 'r' to rlist if it is not there already.
                  # Use first() as a "in" operator that uses "is" rather
                  #  than "==" as compare function.
                if not jdb.isin (r, rlist): rlist.append (r)
            if hasattr (r, '_FREQ'): del r._FREQ

          # Collect the info in .FREQ attributes from all the kanji.
          # This works on kanj's the same as above section works on
          # rdng's and comments above apply here too.
        for k in getattr (entr, '_kanj', []):
            for kw_val in getattr (k, '_FREQ', []):
                klist = fmap[(kw_val)][1]
                if not jdb.isin (k, klist): klist.append (k)
            if hasattr (k, '_FREQ'): del k._FREQ

          # 'fmap' now has one entry for every unique freq (kw,value) tuple
          # which is a pair of sets.  The first set consists of all Rdng
          # objects that (kw,value) freq spec applies to.  The second is
          # the set of all kanji it applies to.  We take all combinations
          # of readings with kanji, and create a Freq object for each.

        errs = jdb.make_freq_objs (fmap, entr)
        return errs
Ejemplo n.º 3
0
 def do_kanjs (self, elems, entr, fmap):
     if elems is None: return
     kanjs = []; dupchk = {}
     for ord, elem in enumerate (elems):
         txt = elem.find('keb').text
         if not jdb.unique (txt, dupchk):
             self.warn ("Duplicate keb text: '%s'" % txt); continue
         if not (jdb.jstr_keb (txt)):
             self.warn ("keb text '%s' not kanji." % txt)
         kanj = jdb.Kanj (kanj=ord+1, txt=txt)
         self.do_kws (elem.findall('ke_inf'), kanj, '_inf', 'KINF')
         for x in elem.findall ('ke_pri'):
             freqtuple = self.parse_freq (x.text, "ke_pri")
             if not freqtuple: continue
             klist = fmap[freqtuple][1]
             if not jdb.isin (kanj, klist): klist.append (kanj)
             else: self.freq_warn ("Duplicate", None, kanj, x.text)
         kanjs.append (kanj)
     if kanjs: entr._kanj = kanjs