Ejemplo n.º 1
0
def variant(x):
    # Map the keywords used in var_type to values used
    # in the database kw* tables, where they differ.
    vmap = {'njecd': 'halpern_njecd', 'oneill': 'oneill_names'}

    vt = x.get('var_type')
    vt = vmap.get(vt, vt)
    if vt == 'ucs': kw = 0
    else: kw = KW.CINF[Xml2db.CINF.get(vt, vt)].id
    return jdb.Cinf(kw=kw, value=x.text)
Ejemplo n.º 2
0
def radical(x, c):
    cinf = c._cinf
    if len(list(x.keys())) != 1: warn('Expected only one rad_value attribute')
    rad_attr, rad_type = list(x.items())[0]
    if rad_attr != 'rad_type':
        warn('Unexpected rad_value attribute: %s', rad_attr)
    if rad_type == 'classical': c.bushu = int(x.text)
    elif rad_type == 'nelson_c':
        cinf.append(jdb.Cinf(kw=KW.CINF_nelson_rad, value=int(x.text)))
    else:
        warn("Unknown radical attribute value: %s=\"%s\"",
             (rad_attr, rad_type))
Ejemplo n.º 3
0
def codepoint(x, c, chtxt):
    cinf = c._cinf
    if len(list(x.keys())) != 1: warn('Expected only one cp_value attribute')
    cp_attr, cp_type = list(x.items())[0]
    if cp_attr != 'cp_type': warn('Unexpected cp_value attribute', cp_attr)
    if cp_type == 'ucs':
        if int(x.text, 16) != jdb.uord(chtxt):
            warn ("xml codepoint ucs value '%s' doesnt match character %s (0x%x)." \
                    % (x.text, chtxt, jdb.uord (chtxt)))
    else:
        cinf.append(
            jdb.Cinf(kw=KW.CINF[Xml2db.CINF.get(cp_type, cp_type)].id,
                     value=x.text))
Ejemplo n.º 4
0
def rmgroup(rmg, langs=None):
    rdngs = []
    glosses = []
    cinf = []
    dupchk = {}
    for x in rmg.findall('reading'):
        rtype = None
        rstat = None
        cinfrec = None
        for aname, aval in list(x.items()):
            if aname == 'r_type': rtype = aval
            if aname == 'on_type': rtype = aval
            if aname == 'r_status': rstat = aval
        if rtype in ('pinyin', 'korean_r', 'korean_h', 'vietnam'):
            if (rtype, x.text) in dupchk:
                warn("Duplicate reading ignored: %s, %s" % (rtype, x.text))
                continue
            dupchk[(rtype, x.text)] = True
            cinf.append(jdb.Cinf(kw=KW.CINF[rtype].id, value=x.text))
        elif rtype == 'ja_on' or rtype == 'ja_kun':
            if x.text in dupchk:
                warn('Duplicate reading ignored: %s' % x.text)
                continue
            dupchk[x.text] = True
            rdng = jdb.Rdng(txt=x.text, _inf=[])
            rdng._inf.append(
                jdb.Rinf(kw=KW.RINF[Xml2db.RINF.get(aval, aval)].id))
            if rstat:
                rdng._inf.append(
                    jdb.Rinf(kw=KW.RINF[Xml2db.RINF.get(rstat, rstat)].id))
            rdngs.append(rdng)
        else:
            raise KeyError('Unkown r_type attribute: %s' % rtype)

    dupchk = {}
    for x in rmg.findall('meaning'):
        lang = x.get('m_lang', 'en')
        langkw = KW.LANG[Xml2db.LANG.get(lang, lang)].id
        if (lang, x.text) in dupchk:
            warn("Duplicate lang,meaning pair ignored: %s:%s" % (lang, x.text))
            continue
        dupchk[(lang, x.text)] = True
        if not langs or langkw in langs:
            glosses.append(jdb.Gloss(txt=x.text, lang=langkw, ginf=1))
    return rdngs, glosses, cinf
Ejemplo n.º 5
0
def dicnum(dic_number, cinf):
    dupchk = {}
    for x in dic_number.findall('dic_ref'):
        drtype = x.get('dr_type')
        val = x.text
        if x.get('m_vol'):
            val = "%s.%s.%s" % (x.get('m_vol'), x.get('m_page'), x.text)
        key = Xml2db.CINF.get(drtype, drtype)
        try:
            kw = KW.CINF[key].id
        except KeyError:
            warn('Unknown CINF keyword: "%s"' % key)
            continue
        if (kw, val) in dupchk:
            warn('Duplicate dr_type,value pair ignored: %s, %s' %
                 (drtype, val))
            continue
        dupchk[(kw, val)] = True
        cinf.append(jdb.Cinf(kw=kw, value=val))
Ejemplo n.º 6
0
def qcode(query_code, cinf):
    dupchk = {}
    saw_misclass = False
    saw_skip = False
    for x in query_code.findall('q_code'):
        qctype = x.get('qc_type')
        val = x.text
        kw = KW.CINF[Xml2db.CINF.get(qctype, qctype)].id
        misclass = x.get('skip_misclass', '')
        if (kw, val) in dupchk:
            warn('Duplicate qc_type,value pair ignored: %s,%s' % (qctype, val))
            continue
        dupchk[(kw, val)] = True
        if misclass:
            if qctype != "skip":
                raise KeyError("'skip_misclass' attr on non-skip element")
            saw_misclass = True
        elif qctype == 'skip':
            saw_skip = True
        cinf.append(jdb.Cinf(kw=kw, value=val, mctype=misclass))
    if saw_misclass and not saw_skip:
        warn("Has skip_misclass but no skip")
Ejemplo n.º 7
0
def strokes(x, n, c):
    cinf = c._cinf
    if n == 0:
        c.strokes = int(x.text)
    else:
        cinf.append(jdb.Cinf(kw=KW.CINF_strokes, value=int(x.text)))