Beispiel #1
0
def reformat(ktxt, rtxt, stxt, entr):
    # Given edict2-formatted kanji, reading, and sense
    # strings, try to convert them into jmdictdb objects,
    # and then format them back to JEL-formatted strings
    # which are returned.  If unable to parse an input
    # string, return the unparsed string prefixed with
    # "!unparsed!" instead of the JEL-formatted string.
    # If matching kanji or reading items exist on 'entr'
    # have and kinf, rinf, freq, or restrs, those items
    # are added to the JEL-formated string.

    failed = False
    kanjs = rdngs = senss = None
    fmap = {}

    # Assume the worst and overwrite the following if
    # things work ok...
    jktxt = "!unparsed!\n" + ktxt
    jrtxt = "!unparsed!\n" + rtxt
    jstxt = "!unparsed!\n" + stxt

    try:
        kanjs = edparse.parse_krpart(ktxt, fmap)
    except eParseError as excep:
        try:
            print("reformat kanj failed: %s" % (str(excep)))
        except UnicodeError:
            "reformat kanj failed: (unprintable exception)"

    if kanjs is not None:  # kanjs is None if kanji parse failed in
        try:  #  which case we can't parse readings or senses.
            rdngs = edparse.parse_krpart(rtxt, fmap, kanjs)
        except eParseError as excep:
            try:
                print("reformat rdng failed: %s" % (str(excep)))
            except UnicodeError:
                "reformat rdng failed: (unprintable exception)"

    if rdngs is not None:  # rdngs is None if reading parse failed in
        if entr:  #  which case we can't parse senses.
            # The wwwjdic submission data does not apparently
            # include tags from the orignal entry so we copy
            # them here.
            copy_tags(entr._rdng, entr._kanj, rdngs, kanjs)
        e = jdb.Entr(_rdng=rdngs, _kanj=kanjs)
        try:
            edparse.parse_spart(stxt, e, fmap)
            senss = e._sens
            jktxt = fmtjel.kanjs(kanjs)
            jrtxt = fmtjel.rdngs(rdngs, kanjs)
            jstxt = fmtjel.senss(senss, kanjs, rdngs)
        except eParseError as excep:
            try:
                print("reformat sens failed: %s" % (str(excep)))
            except UnicodeError:
                "reformat sens failed: (unprintable exception)"

    return jktxt, jrtxt, jstxt
Beispiel #2
0
def create_entr(cursor, parsed):
    # From the dictionary of wwwjdict submission values in
    # 'parsed' we create the same kind of data that cgi/edform.py
    # creates internally to send to the edform.tal template: an
    # Entr object with some attached extra data.  This object is
    # returned to caller (who will serialize it and write it to
    # a file).

    if parsed['subtype'] == 'new':
        entr = jdb.Entr()
        entr.src = jdb.KW.SRC['jmdict'].id
    else:  # == 'amend'
        seqnum = parsed['seqnum']
        errs = []
        # FIXME: following assumes seqnum is an entry in jmdict.
        entrs = jmcgi.get_entrs(cursor,
                                None, [seqnum],
                                errs,
                                active=True,
                                corpus='jmdict')
        if errs: print('\n'.join(errs))
        if entrs: entr = entrs[0]
        else:
            raise ParseError("Unable to get entry seq# %s from database" %
                             seqnum)

    kanj = []
    rdng = []
    gloss = []
    for x in parsed.get('headw', []):
        if jdb.jstr_reb(x): rdng.append(x)
        else: kanj.append(x)
    rdng.extend(parsed.get('kana', []))
    ktxt = ';'.join(kanj)
    rtxt = ';'.join(rdng)
    stxt = ' / '.join(parsed.get('english', []))
    pos = ','.join(parsed.get('pos', []))
    misc = ','.join(parsed.get('misc', []))
    xref = ','.join(parsed.get('crossref', []))
    #FIXME: Note that including pos, xref. et.al. can break
    # a sense parse that would otherwise be ok.  Maybe if the
    # parse fails, we should try again without this stuff,
    # and if that works, append this stuff as "unparsable"-
    # tagged extra text.
    # However, senses other than the first may have this
    # information embedded in the text and it seems a bit
    # much to try pulling it out...
    stxt = (('('+pos+')') if pos else '') \
            + (('(See '+xref+')') if xref else '') \
            + (('('+misc+')') if misc else '') \
            + (' ' if pos or misc or xref else '') + stxt

    #FIXME:  What do about 'date', 'entlangnam' fields?
    # I don't think we care about 'sendNotJS'.

    ktxt, rtxt, stxt = reformat(ktxt, rtxt, stxt, entr)
    entr.ktxt, entr.rtxt, entr.stxt = ktxt, rtxt, stxt
    return entr
Beispiel #3
0
def mkentr(jtxt, etxt):
    global Lnnum
    # Create an entry object to represent the "A" line text of the
    # example sentence.
    e = jdb.Entr(stat=KW.STAT_A, unap=False)
    e.srcnote = str(Lnnum)
    if jdb.jstr_reb(jtxt): e._rdng = [jdb.Rdng(txt=jtxt)]
    else: e._kanj = [jdb.Kanj(txt=jtxt)]
    e._sens = [
        jdb.Sens(
            _gloss=[jdb.Gloss(txt=etxt, ginf=KW.GINF_equ, lang=KW.LANG_eng)])
    ]
    return e
Beispiel #4
0
def mkentr (jtxt, etxt, kwds):
        global Lnnum
          # Create an entry object to represent the "A" line text of the
          # example sentence.
        e = jdb.Entr (stat=KW.STAT_A, unap=False)
        e.srcnote = str (Lnnum)
          # Each @$kwds item is a 2-array consisting of the kw
          # id number and optionally a note string.
        kws = [x[0] for x in kwds]
        sens_note = "; ".join ([x[1] for x in kwds if len(x)>1]) or None
        if jdb.jstr_reb (jtxt): e._rdng = [jdb.Rdng (txt=jtxt)]
        else:                   e._kanj = [jdb.Kanj (txt=jtxt)]
        e._sens = [jdb.Sens (notes=sens_note,
                    _gloss=[jdb.Gloss (lang=KW.LANG_eng,
                                     ginf=KW.GINF_equ, txt=etxt)],
                    _misc=[jdb.Misc (kw=x) for x in kws])]
        return e
Beispiel #5
0
    def do_entr(self,
                elem,
                seq,
                xlit=False,
                xlang=None,
                corp_dict=None,
                grpdefs=None):
        """
    Create an entr object from a parsed ElementTree entry
    element, 'elem'.  'lineno' is the source file line number
    of the "<entry>" line or None and is only used in error
    messages.

    Note that the entry object returned is different from one
    read from the database in the following respects:
    * The 'entr' record will have no .src (aka corpus) attribute
      if there is no <ent_corp> element in the entry.  In this
      case the .src attribute is expected to be added by the
      caller.  If there is a <ent_corp> element, it will be
      used to find a corpus in 'corp_dict', which in turn will
      will provide an id number used in .src.
    * Items in sense's _xref list are unresolved xrefs, not
      resolved xrefs as in a database entr object.
      jdb.resolv_xref() or similar can be used to resolve the
      xrefs.
    * Attributes will be missing if the corresponding xml
      information is not present.  For example, if a particular
      entry has no <ke_ele> elements, the entr object will not
      have a '._kanj' attribute.  In an entr object read from
      the database, it will have a '._kanj' attribute with a
      value of [].
    * The entr object does not have many of the foreign key
      attributes: gloss.gloss, xref.xref, <anything>.entr, etc.
      However, it does have rdng.rdng, kanj.kanj, and sens.sens
      attributes since these are required when adding restr,
      stagr, stagk, and freq objects.
        """
        XKW, KW = self.XKW, self.KW

        entr = jdb.Entr()

        if not seq:
            elemseq = elem.find('ent_seq')
            if elemseq is None: raise ParseError("No <ent_seq> element found")
            try:
                seq = int(elemseq.text)
            except ValueError:
                raise ParseError("Invalid 'ent_seq' value, '%s'" % elem.text)
        if seq <= 0:
            raise ParseError("Invalid 'ent_seq' value, '%s'" % elem.text)
        entr.seq = seq

        id = elem.get('id')
        if id is not None: entr.id = int(id)
        dfrm = elem.get('dfrm')
        if dfrm is not None: entr.dfrm = int(dfrm)
        stat = elem.get('status') or jdb.KW.STAT['A'].id
        try:
            stat = XKW.STAT[stat].id
        except KeyError:
            raise ParseError("Invalid <status> element value, '%s'" % stat)
        entr.stat = stat
        entr.unap = elem.get('appr') == 'n'

        corpname = elem.findtext('ent_corp')
        if corpname is not None: entr.src = corp_dict[corpname].id
        fmap = defaultdict(lambda: ([], []))
        self.do_kanjs(elem.findall('k_ele'), entr, fmap)
        self.do_rdngs(elem.findall('r_ele'), entr, fmap)
        if fmap:
            freq_errs = jdb.make_freq_objs(fmap, entr)
            for x in freq_errs:
                typ, r, k, kw, val = x
                kwstr = XKW.FREQ[kw].kw + str(val)
                self.freq_warn(typ, r, k, kwstr)
        self.do_senss(elem.findall('sense'), entr, xlit, xlang)
        self.do_senss(elem.findall('trans'), entr, xlit, xlang)
        self.do_info(elem.findall("info"), entr)
        self.do_audio(elem.findall("audio"), entr, jdb.Entrsnd)
        self.do_groups(elem.findall("group"), entr, grpdefs)
        return entr
Beispiel #6
0
def p_preentr_3(p):
    '''preentr : kanjsect NL NL senses'''
    p[0] = jdb.Entr(_kanj=p[1], _sens=p[4])
Beispiel #7
0
def p_preentr_2(p):
    '''preentr : NL rdngsect NL senses'''
    p[0] = jdb.Entr(_rdng=p[2], _sens=p[4])
Beispiel #8
0
def p_preentr_1(p):
    '''preentr : kanjsect NL rdngsect NL senses'''
    p[0] = jdb.Entr(_kanj=p[1], _rdng=p[3], _sens=p[5])
Beispiel #9
0
def main(args, opts):
    jdb.reset_encoding(sys.stdout, 'utf-8')
    errs = []
    entrs = []
    try:
        form, svc, dbg, cur, sid, sess, parms, cfg = jmcgi.parseform()
    except Exception as e:
        jmcgi.err_page([str(e)])

    fv = form.getfirst
    fl = form.getlist
    is_editor = jmcgi.is_editor(sess)
    dbg = fv('dbg')
    meth = fv('meth')
    def_corp = fv('c')  # Default corpus for new entries.
    defcorpid = None
    if def_corp:
        try:
            def_corp = int(def_corp)
        except ValueError:
            pass
        try:
            defcorpid = jdb.KW.SRC[def_corp].id
        except KeyError:
            errs.append("Bad url parameter: c=%s" % def_corp)
    force_corp = fv('f')  # Force default corpus for new entries.

    sentrs = fl("entr")
    for sentr in sentrs:
        try:
            entrs = serialize.unserialize(sentr)
        except Exception as e:
            errs.append("Bad 'entr' value, unable to unserialize: %s" % str(e))
        else:
            entrs.append(entr)

    jentrs = fl('j')
    for jentr in jentrs:
        try:
            entr = edparse.entr(jentr)
        except Exception as e:
            errs.append("Bad 'j' value, unable to parse: %s" % str(e))
        else:
            entr.src = None
            entrs.append(entr)

    elist, qlist, active = fl('e'), fl('q'), fv('a')
    if elist or qlist:
        entrs.extend(
            jmcgi.get_entrs(cur,
                            elist or [],
                            qlist or [],
                            errs,
                            active=active,
                            corpus=def_corp) or [])
    cur.close()

    if (elist or qlist or jentrs or sentrs) and not entrs:
        # The caller explictly specified and entry to edit but we
        # didn't find it (or them).  Rather than treating this as
        # though no entries were given and displaying a blank edit
        # form, show an error message.
        errs.append("No matching entries were found")
    if errs: jmcgi.err_page(errs)

    srcs = sorted(jdb.KW.recs('SRC'), key=lambda x: x.kw.lower())
    #srcs.insert (0, jdb.Obj (id=0, kw='', descr=''))
    if not entrs:
        # This is a blank new entry.
        # The following dummy entry will produce the default
        # text for new entries: no kanji, no reading, and sense
        # text "[1][n]".
        entr = jdb.Entr(
            _sens=[jdb.Sens(_pos=[jdb.Pos(kw=jdb.KW.POS['n'].id)])], src=None)
        entrs = [entr]
    for e in entrs:
        if not is_editor: remove_freqs(e)
        e.ISDELETE = (e.stat == jdb.KW.STAT['D'].id) or None
        # Provide a default corpus.
        if not e.src: e.src = defcorpid
        e.NOCORPOPT = force_corp

    if errs: jmcgi.err_page(errs)

    for e in entrs:
        e.ktxt = fmtjel.kanjs(e._kanj)
        e.rtxt = fmtjel.rdngs(e._rdng, e._kanj)
        e.stxt = fmtjel.senss(e._sens, e._kanj, e._rdng)

    if errs: jmcgi.err_page(errs)

    jmcgi.jinja_page('edform.jinja',
                     parms=parms,
                     extra={},
                     entrs=entrs,
                     srcs=srcs,
                     is_editor=is_editor,
                     svc=svc,
                     dbg=dbg,
                     sid=sid,
                     session=sess,
                     cfg=cfg,
                     this_page='edform.py')
Beispiel #10
0
def p_preentr_2(p):
    '''preentr : FF rdngsect FF senses'''
    p[0] = jdb.Entr(_rdng=p[2], _sens=p[4])
Beispiel #11
0
def do_chr(elem, srcid, langs):
    global Char
    # Process a <character> element.  The element has been
    # parsed by the xml ElementTree parse and is in "elem".
    # "lineno" is the source file line number.

    chtxt = elem.find('literal').text
    Char = chtxt  # For warning messages created by warn().
    c = jdb.Chr(chr=chtxt, _cinf=[])
    e = jdb.Entr(src=srcid,
                 stat=KW.STAT_A,
                 seq=jdb.uord(chtxt),
                 unap=False,
                 chr=c,
                 _kanj=[jdb.Kanj(txt=chtxt)],
                 _rdng=[],
                 _sens=[],
                 _krslv=[])
    for x in elem.findall('codepoint/cp_value'):
        codepoint(x, c, chtxt)
    for x in elem.findall('radical/rad_value'):
        radical(x, c)

    x = None
    try:
        x = (elem.find('misc/freq')).text
    except:
        pass
    if x:
        if c.freq is not None: warn('Duplicate "freq" element ignored: %s' % x)
        else: c.freq = int(x)

    x = None
    try:
        x = (elem.find('misc/grade')).text
    except:
        pass
    if x:
        if c.grade is not None:
            warn('Duplicate "grade" element ignored: %s' % x)
        else:
            c.grade = int(x)

    for n, x in enumerate(elem.findall('misc/stroke_count')):
        strokes(x, n, c)

    rn = '\u3001'.join([x.text for x in elem.findall('misc/rad_name')])
    if rn: c.radname = rn

    for x in elem.findall('reading_meaning'):
        reading_meaning(x, e._rdng, e._sens, c._cinf, langs)

    x = elem.find('dic_number')
    if x is not None: dicnum(x, c._cinf)

    x = elem.find('query_code')
    if x is not None: qcode(x, c._cinf)

    for x in elem.findall('misc/variant'):
        e._krslv.append(variant(x))

    x = elem.find('misc/jlpt')
    if x is not None: jlptnum(x, c)

    return e