def do_gloss(self, elems, sens, xlit=False, xlang=None): XKW = self.XKW glosses = [] lits = [] lsrc = [] dupchk = {} for elem in elems: lng = elem.get('{http://www.w3.org/XML/1998/namespace}lang') try: lang = XKW.LANG[lng].id if lng else XKW.LANG['eng'].id except KeyError: self.warn("Invalid gloss lang attribute: '%s'" % lng) continue txt = elem.text if not jdb.jstr_gloss(txt): self.warn("gloss text '%s' not latin characters." % txt) lit = [] if xlit and ('lit:' in txt): txt, lit = extract_lit(txt) if not jdb.unique((lang, txt), dupchk): self.warn("Duplicate lang/text in gloss '%s'/'%s'" % (lng, txt)) continue # (entr,sens,gloss,lang,txt) if txt and (not xlang or lang in xlang): glosses.append( jdb.Gloss(lang=lang, ginf=XKW.GINF['equ'].id, txt=txt)) if lit: lits.extend([ jdb.Gloss(lang=lang, ginf=XKW.GINF['lit'].id, txt=x) for x in lit ]) if glosses or lits: if not hasattr(sens, '_gloss'): sens._gloss = [] sens._gloss.extend(glosses + lits)
def bld_sens(sens, glosses): # Build a sense record. 'glosses' is a list of gloss items. # Each gloss item is a 2-tuple: the first item is the gloss # record and the second, a list of sense tags. # Each of the sense tag items is an n-tuple. The first item # in an n-tuple is either a string giving the type of the tag # ('KINF', 'POS'. 'lsrc', etc) or None indicating the type was # not specified (for example, the input text contained a single # keyword like "vi" rather than "pos=vi"). The second and any # further items are dependent on the the tag type. # Our job is to iterate though this list, and put each item # on the appropriate sense list: e.g. all the "gloss" items go # into the list @{$sens->{_gloss}}, all the "POS" keyword items # go on @{$sens->{_pos}}, etc. KW = jdb.KW errs = [] sens._gloss = [] for gtxt, tags in glosses: gloss = jdb.Gloss(txt=jellex.gcleanup(gtxt)) sens._gloss.append(gloss) if tags: errs.extend(sens_tags(sens, gloss, tags)) if gloss.ginf is None: gloss.ginf = KW.GINF['equ'].id if gloss.lang is None: gloss.lang = KW.LANG['eng'].id return "\n".join(errs)
def doedit (entr, hist, cmd): # entr -- A jdb.Entr() instance to be edited. # hist -- A jdb.Hist instance that will be edited (if the edit # is to add a comment of refs.) # cmd -- A Cmd instance that describes changes to be made to entry. # # Apply the change described by <cmd> to <entr> and /or <hist>. # # Should return True if <entr> or <hist> were actually changed, # False if not, but currently always retuns True. new = None if cmd.operand in ('kanj', 'rdng'): tlist = getattr (entr, '_'+cmd.operand) if cmd.new: if cmd.operand == 'kanj': new = jdb.Kanj (txt=cmd.new) else: new = jdb.Rdng (txt=cmd.new) edit (tlist, 'txt', cmd.old, new or cmd.new, cmd.operand, cmd.old, cmd.new) elif cmd.operand == 'gloss': tlist = getattr (getattr (entr, '_sens')[cmd.sens-1], '_'+cmd.operand) if cmd.new: new = jdb.Gloss (txt=cmd.new, lang=jdb.KW.LANG['eng'].id, ginf=jdb.KW.GINF['equ'].id) edit (tlist, 'txt', cmd.old, new or cmd.new, cmd.operand, cmd.old, cmd.new) elif cmd.operand in ('pos','misc','fld','dial'): tlist = getattr (getattr (entr, '_sens')[cmd.sens-1], '_'+cmd.operand) new, old = kw2id (cmd.operand, cmd.new, cmd.old) edit (tlist, 'kw', old, new, cmd.operand, cmd.old, cmd.new) elif cmd.operand == 'entr': if cmd.cmd == 'del': entr.stat = jdb.KW.STAT['D'].id elif cmd.operand == 'comment': hist.notes = cmd.new elif cmd.operand == 'refs': hist.refs = cmd.new else: raise ValueError (cmd.operand) return True #FIXME: how to determine if no change was made to entry?
def mkentr(jtxt, etxt): global Lnnum # Create an entry object to represent the "A" line text of the # example sentence. e = jdb.Entr(stat=KW.STAT_A, unap=False) e.srcnote = str(Lnnum) if jdb.jstr_reb(jtxt): e._rdng = [jdb.Rdng(txt=jtxt)] else: e._kanj = [jdb.Kanj(txt=jtxt)] e._sens = [ jdb.Sens( _gloss=[jdb.Gloss(txt=etxt, ginf=KW.GINF_equ, lang=KW.LANG_eng)]) ] return e
def mkentr (jtxt, etxt, kwds): global Lnnum # Create an entry object to represent the "A" line text of the # example sentence. e = jdb.Entr (stat=KW.STAT_A, unap=False) e.srcnote = str (Lnnum) # Each @$kwds item is a 2-array consisting of the kw # id number and optionally a note string. kws = [x[0] for x in kwds] sens_note = "; ".join ([x[1] for x in kwds if len(x)>1]) or None if jdb.jstr_reb (jtxt): e._rdng = [jdb.Rdng (txt=jtxt)] else: e._kanj = [jdb.Kanj (txt=jtxt)] e._sens = [jdb.Sens (notes=sens_note, _gloss=[jdb.Gloss (lang=KW.LANG_eng, ginf=KW.GINF_equ, txt=etxt)], _misc=[jdb.Misc (kw=x) for x in kws])] return e
def rmgroup(rmg, langs=None): rdngs = [] glosses = [] cinf = [] dupchk = {} for x in rmg.findall('reading'): rtype = None rstat = None cinfrec = None for aname, aval in list(x.items()): if aname == 'r_type': rtype = aval if aname == 'on_type': rtype = aval if aname == 'r_status': rstat = aval if rtype in ('pinyin', 'korean_r', 'korean_h', 'vietnam'): if (rtype, x.text) in dupchk: warn("Duplicate reading ignored: %s, %s" % (rtype, x.text)) continue dupchk[(rtype, x.text)] = True cinf.append(jdb.Cinf(kw=KW.CINF[rtype].id, value=x.text)) elif rtype == 'ja_on' or rtype == 'ja_kun': if x.text in dupchk: warn('Duplicate reading ignored: %s' % x.text) continue dupchk[x.text] = True rdng = jdb.Rdng(txt=x.text, _inf=[]) rdng._inf.append( jdb.Rinf(kw=KW.RINF[Xml2db.RINF.get(aval, aval)].id)) if rstat: rdng._inf.append( jdb.Rinf(kw=KW.RINF[Xml2db.RINF.get(rstat, rstat)].id)) rdngs.append(rdng) else: raise KeyError('Unkown r_type attribute: %s' % rtype) dupchk = {} for x in rmg.findall('meaning'): lang = x.get('m_lang', 'en') langkw = KW.LANG[Xml2db.LANG.get(lang, lang)].id if (lang, x.text) in dupchk: warn("Duplicate lang,meaning pair ignored: %s:%s" % (lang, x.text)) continue dupchk[(lang, x.text)] = True if not langs or langkw in langs: glosses.append(jdb.Gloss(txt=x.text, lang=langkw, ginf=1)) return rdngs, glosses, cinf