Example #1
0
def p_tagitem_10(p):
    '''tagitem : TEXT EQL jrefs'''
    tag = p[1]
    taglist = []
    tagtype = 'XREF'
    KW = jdb.KW
    for jref in p[3]:
        dotlist, slist, seq, corpus = jref
        if tag in [x.kw for x in KW.recs('XREF')]:
            # FIXME: instead of using XREF kw''s directly, do we want to
            #  change to an lsrc syntax like, "xref=cf:..."
            #  (possibly keeping "see" and "ant" as direct keywords)?
            if len(dotlist) == 1:
                if jdb.jstr_keb(dotlist[0]):
                    taglist.append(
                        ['XREF', tag, None, dotlist[0], slist, seq, corpus])
                else:
                    taglist.append(
                        ['XREF', tag, dotlist[0], None, slist, seq, corpus])
            elif len(dotlist) == 2:
                taglist.append(
                    ['XREF', tag, dotlist[1], dotlist[0], slist, seq, corpus])
            elif len(dotlist) == 0:
                taglist.append(['XREF', tag, None, None, slist, seq, corpus])
            else:
                perror(
                    "No more than on kanji and one reading string can be given in an xref."
                )
            continue
        # The full 'jref' syntax is only used by xrefs (above)
        # so if we get here, complain if the 'jref' item has
        # any xref-specific elements.
        if seq or corpus or slist:
            perror(
                "Seq number, corpus, or a sense list can only be given with xref tags"
            )
        # Xrefs are also the only contruct that uses the middot character
        # syntactically.  Since we don''t have an xref, then the midots are
        # just characters in the text, so put the original text string back
        # together.
        txt = u'\u30FB'.join(dotlist)
        if tag == 'restr':
            if jdb.jstr_keb(txt):
                taglist.append(['RESTR', None, txt])
            else:
                taglist.append(['RESTR', txt, None])
        else:
            # This must be a tag=QTEXT contruct.
            taglist.append(tag_eql_text(p, tag, txt))
    p[0] = taglist
Example #2
0
def p_tagitem_4(p):
    '''tagitem : QTEXT'''
    # FIXME: why isn''t a QTEXT already cleaned up by jellex?
    txt = jellex.qcleanup (p[1][1:-1])
      # FIXME: we should check for ascii text here and treat
      #  that as TEXT above.
    if jdb.jstr_keb (txt): p[0] = [['RESTR', None, txt]]
    else:                  p[0] = [['RESTR', txt, None]]
Example #3
0
def parse_kitem(ktxt, tags, fmap):
    if not jdb.jstr_keb(ktxt):
        raise ParseError('Kanji field not kanji: "%s".' % ktxt)
    kanj = Kanj(txt=ktxt)
    for tag in tags:
        if not tag: continue
        t = lookup_tag(tag, ['KINF', 'FREQ'])
        if t:
            tagtyp, tagval = t[0]
            if tagtyp == 'KINF': kanj._inf.append(Kinf(kw=tagval))
            elif tagtyp == 'FREQ': fmap[t[1:]][1].append(kanj)
        else:
            raise ParseError('Unknown tag "%s" on kanji "%s"' % (tag, ktxt))
    return kanj
Example #4
0
def check_for_warnings(cur, entr, parent_seq, chklist):
    # Look for other entries that have the same kanji or reading.
    # These will be shown as cautions at the top of the confirmation
    # form in hopes of reducing submissions of words already in
    # the database.
    # 'parent_seq' is used by find_similar() to exclude other entries
    # with the same seq# from being flagged as having duplicate kanji
    # or readings.
    dups = find_similar(cur, getattr(entr, '_kanj', []),
                        getattr(entr, '_rdng', []), entr.src, parent_seq)
    if dups: chklist['dups'] = dups

    # FIXME: IS-190.
    if not getattr (entr, '_rdng', None) \
            and entr.src==jdb.KW.SRC['jmdict'].id:
        chklist['norebs'] = True

    # FIXME: Should pass list of the kanj/rdng text rather than
    #   a pre-joined string so that page can present the list as
    #   it wishes.
    chklist['invkebs'] = ", ".join(k.txt for k in getattr(entr, '_kanj', [])
                                   if not jdb.jstr_keb(k.txt))
    chklist['invrebs'] = ", ".join(r.txt for r in getattr(entr, '_rdng', [])
                                   if not jdb.jstr_reb(r.txt))
    # FIXME: IS-190.
    if entr.src == jdb.KW.SRC['jmdict'].id:
        chklist['nopos'] = ", ".join(
            str(n + 1) for n, x in enumerate(getattr(entr, '_sens', []))
            if not x._pos)
    chklist['jpgloss'] = ", ".join(
        "%d.%d: %s" % (n + 1, m + 1, '"' +
                       '", "'.join(re.findall('[\uFF01-\uFF5D]', g.txt)) + '"')
        for n, s in enumerate(getattr(entr, '_sens', []))
        for m, g in enumerate(getattr(s, '_gloss', []))
        # Change text in edconf.tal if charset changed.
        if re.findall('[\uFF01-\uFF5D]', g.txt))

    # Remove any empty warnings so that if there are no warnings,
    # 'chklist' itself will be empty and no warning span element
    # will be produced by the template (which otherwise will
    # contain a <hr/> even if there are no other warnings.)
    for k in list(chklist.keys()):
        if not chklist[k]: del chklist[k]
Example #5
0
 def do_kanjs (self, elems, entr, fmap):
     if elems is None: return
     kanjs = []; dupchk = {}
     for ord, elem in enumerate (elems):
         txt = elem.find('keb').text
         if not jdb.unique (txt, dupchk):
             self.warn ("Duplicate keb text: '%s'" % txt); continue
         if not (jdb.jstr_keb (txt)):
             self.warn ("keb text '%s' not kanji." % txt)
         kanj = jdb.Kanj (kanj=ord+1, txt=txt)
         self.do_kws (elem.findall('ke_inf'), kanj, '_inf', 'KINF')
         for x in elem.findall ('ke_pri'):
             freqtuple = self.parse_freq (x.text, "ke_pri")
             if not freqtuple: continue
             klist = fmap[freqtuple][1]
             if not jdb.isin (kanj, klist): klist.append (kanj)
             else: self.freq_warn ("Duplicate", None, kanj, x.text)
         kanjs.append (kanj)
     if kanjs: entr._kanj = kanjs
Example #6
0
def parse_stags(tag, sens, kanjs, rdngs):
    stagrtxts = []
    stagktxts = []
    words = tag.split(',')
    for word in words:
        word = word.strip()
        if jdb.jstr_reb(word): stagrtxts.append(word)
        elif jdb.jstr_keb(word): stagktxts.append(word)
        else:
            raise ParseError(
                'stagx restriction word neither reading or kanji: "%s"' % word)
    errs = []
    jdb.txt2restr(stagrtxts, sens, rdngs, '_stagr', bad=errs)
    if errs:
        raise ParseError('Stagr text not in readings: "%s"' % '","'.join(errs))
    errs = []
    jdb.txt2restr(stagktxts, sens, kanjs, '_stagk', bad=errs)
    if errs:
        raise ParseError('Stagk text not in kanji: "%s"' % '","'.join(errs))
    return
Example #7
0
 def check (_, expected, testtext):
     result = jdb.jstr_keb (testtext)
     _.assertEqual (result, expected)