Example #1
0
 def do_gloss(self, elems, sens, xlit=False, xlang=None):
     XKW = self.XKW
     glosses = []
     lits = []
     lsrc = []
     dupchk = {}
     for elem in elems:
         lng = elem.get('{http://www.w3.org/XML/1998/namespace}lang')
         try:
             lang = XKW.LANG[lng].id if lng else XKW.LANG['eng'].id
         except KeyError:
             self.warn("Invalid gloss lang attribute: '%s'" % lng)
             continue
         txt = elem.text
         if not jdb.jstr_gloss(txt):
             self.warn("gloss text '%s' not latin characters." % txt)
         lit = []
         if xlit and ('lit:' in txt):
             txt, lit = extract_lit(txt)
         if not jdb.unique((lang, txt), dupchk):
             self.warn("Duplicate lang/text in gloss '%s'/'%s'" %
                       (lng, txt))
             continue
         # (entr,sens,gloss,lang,txt)
         if txt and (not xlang or lang in xlang):
             glosses.append(
                 jdb.Gloss(lang=lang, ginf=XKW.GINF['equ'].id, txt=txt))
         if lit:
             lits.extend([
                 jdb.Gloss(lang=lang, ginf=XKW.GINF['lit'].id, txt=x)
                 for x in lit
             ])
     if glosses or lits:
         if not hasattr(sens, '_gloss'): sens._gloss = []
         sens._gloss.extend(glosses + lits)
Example #2
0
def bld_sens(sens, glosses):
    # Build a sense record.  'glosses' is a list of gloss items.
    # Each gloss item is a 2-tuple: the first item is the gloss
    # record and the second, a list of sense tags.
    # Each of the sense tag items is an n-tuple.  The first item
    # in an n-tuple is either a string giving the type of the tag
    # ('KINF', 'POS'. 'lsrc', etc) or None indicating the type was
    # not specified (for example, the input text contained a single
    # keyword like "vi" rather than "pos=vi").  The second and any
    # further items are dependent on the the tag type.
    # Our job is to iterate though this list, and put each item
    # on the appropriate sense list: e.g. all the "gloss" items go
    # into the list @{$sens->{_gloss}}, all the "POS" keyword items
    # go on @{$sens->{_pos}}, etc.

    KW = jdb.KW
    errs = []
    sens._gloss = []
    for gtxt, tags in glosses:
        gloss = jdb.Gloss(txt=jellex.gcleanup(gtxt))
        sens._gloss.append(gloss)
        if tags: errs.extend(sens_tags(sens, gloss, tags))
        if gloss.ginf is None: gloss.ginf = KW.GINF['equ'].id
        if gloss.lang is None: gloss.lang = KW.LANG['eng'].id
    return "\n".join(errs)
Example #3
0
def doedit (entr, hist, cmd):
        # entr -- A jdb.Entr() instance to be edited. 
        # hist --  A jdb.Hist instance that will be edited (if the edit
        #   is to add a comment of refs.)
        # cmd -- A Cmd instance that describes changes to be made to entry.
        # 
        # Apply the change described by <cmd> to <entr> and /or <hist>.
        #
        # Should return True if <entr> or <hist> were actually changed,
        # False if not, but currently always retuns True.

        new = None
        if cmd.operand in ('kanj', 'rdng'): 
            tlist = getattr (entr, '_'+cmd.operand)
            if cmd.new:
                if cmd.operand == 'kanj': new = jdb.Kanj (txt=cmd.new)
                else: new = jdb.Rdng (txt=cmd.new)
            edit (tlist, 'txt', cmd.old, new or cmd.new, cmd.operand, cmd.old, cmd.new)
        elif cmd.operand == 'gloss':
            tlist = getattr (getattr (entr, '_sens')[cmd.sens-1], '_'+cmd.operand)
            if cmd.new: new = jdb.Gloss (txt=cmd.new, lang=jdb.KW.LANG['eng'].id, 
                                                      ginf=jdb.KW.GINF['equ'].id)
            edit (tlist, 'txt', cmd.old, new or cmd.new, cmd.operand, cmd.old, cmd.new)
        elif cmd.operand in ('pos','misc','fld','dial'):
            tlist = getattr (getattr (entr, '_sens')[cmd.sens-1], '_'+cmd.operand)
            new, old = kw2id (cmd.operand, cmd.new, cmd.old)
            edit (tlist, 'kw', old, new, cmd.operand, cmd.old, cmd.new)
        elif cmd.operand == 'entr':
            if cmd.cmd == 'del': entr.stat = jdb.KW.STAT['D'].id
        elif cmd.operand == 'comment': hist.notes = cmd.new
        elif cmd.operand == 'refs': hist.refs = cmd.new
        else: raise ValueError (cmd.operand)

        return True #FIXME: how to determine if no change was made to entry?
Example #4
0
def mkentr(jtxt, etxt):
    global Lnnum
    # Create an entry object to represent the "A" line text of the
    # example sentence.
    e = jdb.Entr(stat=KW.STAT_A, unap=False)
    e.srcnote = str(Lnnum)
    if jdb.jstr_reb(jtxt): e._rdng = [jdb.Rdng(txt=jtxt)]
    else: e._kanj = [jdb.Kanj(txt=jtxt)]
    e._sens = [
        jdb.Sens(
            _gloss=[jdb.Gloss(txt=etxt, ginf=KW.GINF_equ, lang=KW.LANG_eng)])
    ]
    return e
Example #5
0
def mkentr (jtxt, etxt, kwds):
        global Lnnum
          # Create an entry object to represent the "A" line text of the
          # example sentence.
        e = jdb.Entr (stat=KW.STAT_A, unap=False)
        e.srcnote = str (Lnnum)
          # Each @$kwds item is a 2-array consisting of the kw
          # id number and optionally a note string.
        kws = [x[0] for x in kwds]
        sens_note = "; ".join ([x[1] for x in kwds if len(x)>1]) or None
        if jdb.jstr_reb (jtxt): e._rdng = [jdb.Rdng (txt=jtxt)]
        else:                   e._kanj = [jdb.Kanj (txt=jtxt)]
        e._sens = [jdb.Sens (notes=sens_note,
                    _gloss=[jdb.Gloss (lang=KW.LANG_eng,
                                     ginf=KW.GINF_equ, txt=etxt)],
                    _misc=[jdb.Misc (kw=x) for x in kws])]
        return e
Example #6
0
def rmgroup(rmg, langs=None):
    rdngs = []
    glosses = []
    cinf = []
    dupchk = {}
    for x in rmg.findall('reading'):
        rtype = None
        rstat = None
        cinfrec = None
        for aname, aval in list(x.items()):
            if aname == 'r_type': rtype = aval
            if aname == 'on_type': rtype = aval
            if aname == 'r_status': rstat = aval
        if rtype in ('pinyin', 'korean_r', 'korean_h', 'vietnam'):
            if (rtype, x.text) in dupchk:
                warn("Duplicate reading ignored: %s, %s" % (rtype, x.text))
                continue
            dupchk[(rtype, x.text)] = True
            cinf.append(jdb.Cinf(kw=KW.CINF[rtype].id, value=x.text))
        elif rtype == 'ja_on' or rtype == 'ja_kun':
            if x.text in dupchk:
                warn('Duplicate reading ignored: %s' % x.text)
                continue
            dupchk[x.text] = True
            rdng = jdb.Rdng(txt=x.text, _inf=[])
            rdng._inf.append(
                jdb.Rinf(kw=KW.RINF[Xml2db.RINF.get(aval, aval)].id))
            if rstat:
                rdng._inf.append(
                    jdb.Rinf(kw=KW.RINF[Xml2db.RINF.get(rstat, rstat)].id))
            rdngs.append(rdng)
        else:
            raise KeyError('Unkown r_type attribute: %s' % rtype)

    dupchk = {}
    for x in rmg.findall('meaning'):
        lang = x.get('m_lang', 'en')
        langkw = KW.LANG[Xml2db.LANG.get(lang, lang)].id
        if (lang, x.text) in dupchk:
            warn("Duplicate lang,meaning pair ignored: %s:%s" % (lang, x.text))
            continue
        dupchk[(lang, x.text)] = True
        if not langs or langkw in langs:
            glosses.append(jdb.Gloss(txt=x.text, lang=langkw, ginf=1))
    return rdngs, glosses, cinf