Exemple #1
0
def _get_jel_text(entr):
    '''Generate and return a JEL string from entry object
        'entr'.  The first line (text before the first "\n"
        character) is removed since it contains nformation
        that will vary between objects read from a database
        and created by parsing input text.'''

    jeltxt = fmtjel.entr(entr)
    return jeltxt.partition('\n')[2]
Exemple #2
0
def main(args, opts):
    # The following call creates a database "cursor" that will
    # be used for subsequent database operations.  It also, as
    # a side-effect, create a global variable in module 'jdb'
    # named 'KW' which contains data read from all the keyword
    # database tables (tables with names matchingthe pattern
    # "kw*".  We read this data once at program startup to avoid
    # multiple hi-cost trips to the database later.
    try:
        cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts))
    except jdb.dbapi.OperationalError as e:
        print("Error, unable to connect to database, do you need -u or -p?\n",
              str(e),
              file=sys.stderr)
        sys.exit(1)

    enc = opts.encoding or sys.stdout.encoding or 'utf-8'
    jdb.reset_encoding(sys.stdout, encoding=enc)

    # Get the command line options and convert them into a sql
    # statement that will find the desired entries.
    sql, sqlargs = opts2sql(args, opts)
    if opts.debug:
        print(("%s  %s" % (sql, repr(sqlargs))))

    # Retrieve the entries from the database.  'entrs' will be
    # set to a list on entry objects.  'raw' is set to dictionary,
    # keyed by table name, and with values consisting of all the
    # rows retrieved from that table.
    entrs, raw = jdb.entrList(cur, sql, sqlargs, ret_tuple=True)

    # Any xrefs in the retrieved entry objects contain contain only
    # the entry id numbers of the referenced entries.  We want to be
    # able to show the refernced entriy's kanji, glosses, etc so we
    # call "augment_xrefs" to get this extra information.  Same for
    # any reverse refrerences.
    jdb.augment_xrefs(cur, raw['xref'])
    jdb.augment_xrefs(cur, raw['xrer'], rev=1)
    jdb.add_xsens_lists(raw['xref'])
    jdb.mark_seq_xrefs(cur, raw['xref'])

    # Now all we have to do is print the entries.
    first = True
    for e in entrs:
        # Format the entry for printing, according to the
        # kind of out put the user requested.
        if opts.jel: txt = fmtjel.entr(e)
        else: txt = fmt.entr(e)

        # Print the formatted entry using the requested encoding
        # and inserting a blank line between entries.
        if not first: print()
        print(txt)
        first = False

    if len(entrs) == 0: print("No entries found")
Exemple #3
0
def roundtrip(cur, intxt):
    jellex.lexreset(Lexer, intxt)
    entr = Parser.parse(intxt, lexer=Lexer)
    entr.src = 1
    jelparse.resolv_xrefs(cur, entr)
    for s in entr._sens:
        jdb.augment_xrefs(cur, getattr(s, '_xref', []))
    for s in entr._sens:
        jdb.add_xsens_lists(getattr(s, '_xref', []))
    for s in entr._sens:
        jdb.mark_seq_xrefs(cur, getattr(s, '_xref', []))
    outtxt = fmtjel.entr(entr, nohdr=True)
    return outtxt
Exemple #4
0
def _get_text_from_database(seq, src):
    cur = jdb.dbOpen('jmdict')
    KW = jdb.KW
    sql = "SELECT id FROM entr WHERE seq=%s AND src=%s"
    elist = jdb.entrList(cur, sql, [seq, src])
    if not elist:
        print("Entry %s not found" % seq)
        return
    entr = elist[0]
    for s in entr._sens:
        jdb.augment_xrefs(cur, getattr(s, '_xref', []))
    txt = fmtjel.entr(entr)
    txt = txt.partition('\n')[2]
    return txt
Exemple #5
0
def dotest(_,
           testid,
           xmlfn=None,
           jelfn=None,
           dir='data/fmtjel',
           enc='utf_8_sig'):
    if xmlfn is None: xmlfn = os.path.join(dir, testid + '.xml')
    if jelfn is None: jelfn = os.path.join(dir, testid + '.jel')
    expected = readfile(jelfn, enc)
    xmlu = readfile(xmlfn, enc)
    xml8 = xmlu  #xmlu.encode ('utf-8')
    elist = Jmparser.parse_entry(xml8)
    got = fmtjel.entr(elist[0], nohdr=True)
    msg = "\nExpected:\n%s\nGot:\n%s" % (expected, got)
    _.assertEqual(expected, got, msg)
Exemple #6
0
def roundtrip(cur, intxt):
    # Since hg-180523-6b1a12 we use '\f' to separate the kanji, reading
    # and senses sections in JEL text used as input to jelparse()
    # rather than '\n' which was previously used.  To avoid changing
    # all the test data that still uses '\n', we call secsepfix() to
    # replace the first two '\n's in the test data with '\f's to make
    # suitable for parsing.
    intxt = secsepfix(intxt)
    jellex.lexreset(Lexer, intxt)
    entr = Parser.parse(intxt, lexer=Lexer)
    entr.src = 1
    jelparse.resolv_xrefs(cur, entr)
    for s in entr._sens:
        jdb.augment_xrefs(cur, getattr(s, '_xref', []))
    for s in entr._sens:
        jdb.add_xsens_lists(getattr(s, '_xref', []))
    for s in entr._sens:
        jdb.mark_seq_xrefs(cur, getattr(s, '_xref', []))
    outtxt = fmtjel.entr(entr, nohdr=True)
    return outtxt
Exemple #7
0
def _interactive(cur, lexer, parser):
    cnt = 0
    instr = ''
    while 1:
        instr = _getinptext()
        if not instr: break
        jellex.lexreset(lexer, instr)
        try:
            result = parser.parse(instr, lexer=lexer, debug=opts.debug)
        except jelparse.ParseError as e:
            if not e.loc: msg = e.args[0]
            else: msg = "%s\n%s" % (e.args[0], e.loc)
            print(msg)
            continue
        try:
            jelparse.resolv_xrefs(cur, result)
        except ValueError:
            print(e)
        s = fmtjel.entr(result)
        print(s)
Exemple #8
0
 def test_002(_):
     e1 = Entr(id=100, src=1, seq=1000010, stat=2, unap=False)
     e1._kanj = [Kanj(txt='手紙', ), Kanj(txt='切手')]
     e1._rdng = [Rdng(txt='てがみ'), Rdng(txt='あとで'), Rdng(txt='きって')]
     r = Restr()
     e1._rdng[0]._restr.append(r)
     e1._kanj[1]._restr.append(r)
     r = Restr()
     e1._rdng[1]._restr.append(r)
     e1._kanj[0]._restr.append(r)
     r = Restr()
     e1._rdng[2]._restr.append(r)
     e1._kanj[0]._restr.append(r)
     r = Restr()
     e1._rdng[2]._restr.append(r)
     e1._kanj[1]._restr.append(r)
     expect =  'jmdict 1000010 A {100}\n' \
               '手紙;切手\n' \
               'てがみ[手紙];あとで[切手];きって[nokanji]\n'
     jeltxt = fmtjel.entr(e1)
     msg = "\nA:\n%s\nB:\n%s" % (expect, jeltxt)
     _.assertEqual(expect, jeltxt, msg)
Exemple #9
0
 def check(self, seq):
     global Cur, KW
     # Read expected text, remove any unicode BOM or trailing whitespace
     # that may have been added when editing.
     expected = open("data/fmtjel/" + str(seq) + ".txt",
                     encoding='utf-8').read().rstrip()
     if expected[0] == '\ufeff': expected = expected[1:]
     # Read the entry from the database.  Be sure to get from the right
     # corpus and get only the currently active entry.  Assert that we
     # received excatly one entry.
     sql = "SELECT id FROM entr WHERE src=1 AND seq=%s AND stat=2 AND NOT unap"
     entrs, data = jdb.entrList(Cur, sql, (seq, ), ret_tuple=True)
     self.assertEqual(1, len(entrs))
     # Add the annotations needed for dislaying xrefs in condensed form.
     jdb.augment_xrefs(Cur, data['xref'])
     jdb.augment_xrefs(Cur, data['xrer'], rev=True)
     fmtjel.markup_xrefs(Cur, data['xref'])
     # Test fmtjel by having it convert the entry to JEL.
     resulttxt = fmtjel.entr(entrs[0]).splitlines(True)
     # Confirm that the received text matched the expected text.
     if resulttxt: resulttxt = ''.join(resulttxt[1:])
     self.assert_(10 < len(resulttxt))
     msg = "\nExpected:\n%s\nGot:\n%s" % (expected, resulttxt)
     self.assertEqual(expected, resulttxt, msg)
Exemple #10
0
def main (args, opts):
        jdb.reset_encoding (sys.stdout, 'utf-8')
        errs = []
        try: form, svc, dbg, cur, sid, sess, parms, cfg = jmcgi.parseform()
        except Exception as e: jmcgi.err_page ([str (e)])
        entries = jmcgi.get_entrs (cur, form.getlist ('e'),
                                        form.getlist ('q'), errs)
        if errs: jmcgi.err_page (errs)

          # Add a .SEQKR attribute to each entry in 'entries' that
          # gives the kanji and reading of the newest (most recently 
          # edited) entry that has the same sequence number. 
        seqkr_decorate (entries)

          # Sort the entries.  The sorting order will group entries 
          # with the same sequence number (.src,.seq) together and 
          # each of those groups will be ordered by the kanji/reading
          # of the newest (most recently edited) entry in the group.
          # (The kanji and/or readings of an entry are sometimes changed
          # and this order will keep the changed entries together with
          # their pre-changed versions, while maintaining an overall
          # ordering by kanji/reading.)  Within each group having the 
          # same sequence number, entries are sorted in descending order
          # by the timestamp of the most recent history; that is, from
          # the most recently edited entry to the least recently edited
          # one. 
        entries.sort (key=lambda e: (
                e.SEQKR[0], e.SEQKR[1], 
                e.src, e.seq,  # In case different seqs have same SEQKR.
                  # e._hist[*].dt is a datatime.datetime instance.
                -(e._hist[-1].dt.timestamp() if e._hist else 0), 
                -e.id))
        for e in entries:
            for s in e._sens:
                if hasattr (s, '_xref'): jdb.augment_xrefs (cur, s._xref)
                if hasattr (s, '_xrer'): jdb.augment_xrefs (cur, s._xrer, 1)
            if hasattr (e, '_snd'): jdb.augment_snds (cur, e._snd)
        cur.close()
        disp = form.getfirst ('disp')
        if disp == 'xml':
            etxts = [fmtxml.entr (e) for e in entries]
        elif disp == 'jm':
            etxts = [fmtxml.entr (e, compat='jmdict') for e in entries]
        elif disp == 'jmne':
            etxts = [fmtxml.entr (e, compat='jmnedict') for e in entries]
        elif disp == 'jel':
            etxts = [fmtjel.entr (e) for e in entries]
        elif disp == 'ed':
            etxts = [xslfmt.entr (e) for e in entries]
        else:
            etxts = ['' for e in entries]
        jmcgi.htmlprep (entries)
        jmcgi.add_encodings (entries)    # For kanjidic entries.
        if disp == 'ed': etxts = [jmcgi.txt2html (x) for x in etxts]
        jmcgi.add_filtered_xrefs (entries, rem_unap=True)

        if errs: jmcgi.err_page (errs)

        jmcgi.jinja_page ('entr.jinja',
                        entries=list(zip(entries, etxts)), disp=disp,
                        svc=svc, dbg=dbg, sid=sid, session=sess, cfg=cfg,
                        parms=parms, this_page='entr.py')
Exemple #11
0
 def test_001(_):
     e1 = Entr(id=100, src=1, seq=1000010, stat=2, unap=False)
     expect = 'jmdict 1000010 A {100}\n\n\n'
     jeltxt = fmtjel.entr(e1)
     _.assertEqual(expect, jeltxt)