Ejemplo n.º 1
0
def rmgroup(rmg, langs=None):
    rdngs = []
    glosses = []
    cinf = []
    dupchk = {}
    for x in rmg.findall('reading'):
        rtype = None
        rstat = None
        cinfrec = None
        for aname, aval in list(x.items()):
            if aname == 'r_type': rtype = aval
            if aname == 'on_type': rtype = aval
            if aname == 'r_status': rstat = aval
        if rtype in ('pinyin', 'korean_r', 'korean_h', 'vietnam'):
            if (rtype, x.text) in dupchk:
                warn("Duplicate reading ignored: %s, %s" % (rtype, x.text))
                continue
            dupchk[(rtype, x.text)] = True
            cinf.append(jdb.Cinf(kw=KW.CINF[rtype].id, value=x.text))
        elif rtype == 'ja_on' or rtype == 'ja_kun':
            if x.text in dupchk:
                warn('Duplicate reading ignored: %s' % x.text)
                continue
            dupchk[x.text] = True
            rdng = jdb.Rdng(txt=x.text, _inf=[])
            rdng._inf.append(
                jdb.Rinf(kw=KW.RINF[Xml2db.RINF.get(aval, aval)].id))
            if rstat:
                rdng._inf.append(
                    jdb.Rinf(kw=KW.RINF[Xml2db.RINF.get(rstat, rstat)].id))
            rdngs.append(rdng)
        else:
            raise KeyError('Unkown r_type attribute: %s' % rtype)

    dupchk = {}
    for x in rmg.findall('meaning'):
        lang = x.get('m_lang', 'en')
        langkw = KW.LANG[Xml2db.LANG.get(lang, lang)].id
        if (lang, x.text) in dupchk:
            warn("Duplicate lang,meaning pair ignored: %s:%s" % (lang, x.text))
            continue
        dupchk[(lang, x.text)] = True
        if not langs or langkw in langs:
            glosses.append(jdb.Gloss(txt=x.text, lang=langkw, ginf=1))
    return rdngs, glosses, cinf
Ejemplo n.º 2
0
def bld_rdng(r, taglist=[]):
    errs = []
    nokanj = False
    for t in taglist:
        typ = t.pop(0)
        if typ is None:
            v = lookup_tag(t[0], ('RINF', 'FREQ'))
            if not v:
                typ = None
                errs.append("Unknown reading tag '%s'" % t[0])
            else:
                typ, t = v[0][0], v[0][1:]
        if typ == 'RINF': append(r, '_inf', jdb.Rinf(kw=t[0]))
        elif typ == 'FREQ':
            # _freq objects are referenced by both the reading and
            # kanji _freq lists.  Since we don't have access to
            # the kanj here, temporarily save the freq (kw, value)
            # tuple in attribute "._FREQ".  When the full entry is
            # processed, the info in here will be removed, merged
            # with parallel info from the kanj objects, and proper
            # ._freq objects created.
            append(r, '_FREQ', (t[0], t[1]))
        elif typ == 'RESTR':
            # We can't generate real restr records here because the real
            # records are the disallowed kanji.  We have the allowed
            # kanji here and need the set of all kanji in order to get
            # the disallowed set, and we don't have that now.  So we
            # just save the allowed kanji as given, and will convert it
            # after the full entry is built and we have all the info we
            # need.
            #for xitem in t[0]:
            # An xitem represents a reference to another entry
            # or other info within an entry, in textual form.  It
            # is used for xrefs and restr info.  It is a 5-seq
            # with the following values:
            #   [0] -- Reading text
            #   [1] -- Kanji text
            # For a reading restr, it is expected to contain only
            # a kanji text.
            rtxt, ktxt = t
            if rtxt == "nokanji":
                nokanj = True
                r._NOKANJI = 1
                continue
            if rtxt:
                errs.append("Reading restrictions must be kanji only: " + rtxt)
            append(r, "_RESTR", ktxt)
            if hasattr(r, '_RESTR') and nokanj:
                errs.append(
                    "Can't use both kanji and \"nokanji\" in 'restr' tags")
        elif typ:
            errs.append("Cannot use '%s' tag in a reading" % typ)
    return "\n".join(errs)
Ejemplo n.º 3
0
def reading_meaning(rm, rdng, sens, cinf, langs):
    KW_NANORI = KW.RINF[Xml2db.RINF.get('nanori', 'nanori')].id
    for x in rm.findall('rmgroup'):
        r, g, c = rmgroup(x, langs)
        rdng.extend(r)
        sens.append(jdb.Sens(_gloss=g))
    # Make a dict keyed by the readings already parsed.
    rlookup = dict([(r.txt, r) for r in rdng])
    # Get the nanori readings...
    for x in rm.findall('nanori'):
        # There may be nanori readings that are the same as
        # the on/kun readings we've already parsed.  Lookup
        # the nanori reading in the readings dict.  If we
        # already have the reading, just add the nanori RINF
        # tag to it.  Otherwise create a new reading record.
        try:
            # Check if reading has already been seen.
            r = rlookup[x.text]
            # It has.  See if it occured as a nanori reading.
            wasnanori = False
            for i in getattr(r, '_inf', []):
                if i.kw == KW_NANORI:
                    wasnanori = True
                    break
            # It occured previously as a nanori reading so this
            # instance must be a duplicate.
            if wasnanori:
                warn('Duplicate nanori reading: "%s"' % x.text)
                continue
            # At this point, the nanori reading occured previously
            # but as a jp-on or jp-kun reading.  'r' is set to
            # that previous reading, and we will (below) just
            # add a nanori tag to 'r'.
        except KeyError:
            # This nanori reading has not been seen before.
            # Create a new Rdng object for it.
            r = jdb.Rdng(txt=x.text)
            rdng.append(r)
            # Add it to the previously seen readings dict.
            rlookup[r.txt] = r
        if not hasattr(r, '_inf'): r._inf = []
        r._inf.append(jdb.Rinf(kw=KW_NANORI))
    cinf.extend(c)