def _get_jel_text(entr): '''Generate and return a JEL string from entry object 'entr'. The first line (text before the first "\n" character) is removed since it contains nformation that will vary between objects read from a database and created by parsing input text.''' jeltxt = fmtjel.entr(entr) return jeltxt.partition('\n')[2]
def main(args, opts): # The following call creates a database "cursor" that will # be used for subsequent database operations. It also, as # a side-effect, create a global variable in module 'jdb' # named 'KW' which contains data read from all the keyword # database tables (tables with names matchingthe pattern # "kw*". We read this data once at program startup to avoid # multiple hi-cost trips to the database later. try: cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts)) except jdb.dbapi.OperationalError as e: print("Error, unable to connect to database, do you need -u or -p?\n", str(e), file=sys.stderr) sys.exit(1) enc = opts.encoding or sys.stdout.encoding or 'utf-8' jdb.reset_encoding(sys.stdout, encoding=enc) # Get the command line options and convert them into a sql # statement that will find the desired entries. sql, sqlargs = opts2sql(args, opts) if opts.debug: print(("%s %s" % (sql, repr(sqlargs)))) # Retrieve the entries from the database. 'entrs' will be # set to a list on entry objects. 'raw' is set to dictionary, # keyed by table name, and with values consisting of all the # rows retrieved from that table. entrs, raw = jdb.entrList(cur, sql, sqlargs, ret_tuple=True) # Any xrefs in the retrieved entry objects contain contain only # the entry id numbers of the referenced entries. We want to be # able to show the refernced entriy's kanji, glosses, etc so we # call "augment_xrefs" to get this extra information. Same for # any reverse refrerences. jdb.augment_xrefs(cur, raw['xref']) jdb.augment_xrefs(cur, raw['xrer'], rev=1) jdb.add_xsens_lists(raw['xref']) jdb.mark_seq_xrefs(cur, raw['xref']) # Now all we have to do is print the entries. first = True for e in entrs: # Format the entry for printing, according to the # kind of out put the user requested. if opts.jel: txt = fmtjel.entr(e) else: txt = fmt.entr(e) # Print the formatted entry using the requested encoding # and inserting a blank line between entries. if not first: print() print(txt) first = False if len(entrs) == 0: print("No entries found")
def roundtrip(cur, intxt): jellex.lexreset(Lexer, intxt) entr = Parser.parse(intxt, lexer=Lexer) entr.src = 1 jelparse.resolv_xrefs(cur, entr) for s in entr._sens: jdb.augment_xrefs(cur, getattr(s, '_xref', [])) for s in entr._sens: jdb.add_xsens_lists(getattr(s, '_xref', [])) for s in entr._sens: jdb.mark_seq_xrefs(cur, getattr(s, '_xref', [])) outtxt = fmtjel.entr(entr, nohdr=True) return outtxt
def _get_text_from_database(seq, src): cur = jdb.dbOpen('jmdict') KW = jdb.KW sql = "SELECT id FROM entr WHERE seq=%s AND src=%s" elist = jdb.entrList(cur, sql, [seq, src]) if not elist: print("Entry %s not found" % seq) return entr = elist[0] for s in entr._sens: jdb.augment_xrefs(cur, getattr(s, '_xref', [])) txt = fmtjel.entr(entr) txt = txt.partition('\n')[2] return txt
def dotest(_, testid, xmlfn=None, jelfn=None, dir='data/fmtjel', enc='utf_8_sig'): if xmlfn is None: xmlfn = os.path.join(dir, testid + '.xml') if jelfn is None: jelfn = os.path.join(dir, testid + '.jel') expected = readfile(jelfn, enc) xmlu = readfile(xmlfn, enc) xml8 = xmlu #xmlu.encode ('utf-8') elist = Jmparser.parse_entry(xml8) got = fmtjel.entr(elist[0], nohdr=True) msg = "\nExpected:\n%s\nGot:\n%s" % (expected, got) _.assertEqual(expected, got, msg)
def roundtrip(cur, intxt): # Since hg-180523-6b1a12 we use '\f' to separate the kanji, reading # and senses sections in JEL text used as input to jelparse() # rather than '\n' which was previously used. To avoid changing # all the test data that still uses '\n', we call secsepfix() to # replace the first two '\n's in the test data with '\f's to make # suitable for parsing. intxt = secsepfix(intxt) jellex.lexreset(Lexer, intxt) entr = Parser.parse(intxt, lexer=Lexer) entr.src = 1 jelparse.resolv_xrefs(cur, entr) for s in entr._sens: jdb.augment_xrefs(cur, getattr(s, '_xref', [])) for s in entr._sens: jdb.add_xsens_lists(getattr(s, '_xref', [])) for s in entr._sens: jdb.mark_seq_xrefs(cur, getattr(s, '_xref', [])) outtxt = fmtjel.entr(entr, nohdr=True) return outtxt
def _interactive(cur, lexer, parser): cnt = 0 instr = '' while 1: instr = _getinptext() if not instr: break jellex.lexreset(lexer, instr) try: result = parser.parse(instr, lexer=lexer, debug=opts.debug) except jelparse.ParseError as e: if not e.loc: msg = e.args[0] else: msg = "%s\n%s" % (e.args[0], e.loc) print(msg) continue try: jelparse.resolv_xrefs(cur, result) except ValueError: print(e) s = fmtjel.entr(result) print(s)
def test_002(_): e1 = Entr(id=100, src=1, seq=1000010, stat=2, unap=False) e1._kanj = [Kanj(txt='手紙', ), Kanj(txt='切手')] e1._rdng = [Rdng(txt='てがみ'), Rdng(txt='あとで'), Rdng(txt='きって')] r = Restr() e1._rdng[0]._restr.append(r) e1._kanj[1]._restr.append(r) r = Restr() e1._rdng[1]._restr.append(r) e1._kanj[0]._restr.append(r) r = Restr() e1._rdng[2]._restr.append(r) e1._kanj[0]._restr.append(r) r = Restr() e1._rdng[2]._restr.append(r) e1._kanj[1]._restr.append(r) expect = 'jmdict 1000010 A {100}\n' \ '手紙;切手\n' \ 'てがみ[手紙];あとで[切手];きって[nokanji]\n' jeltxt = fmtjel.entr(e1) msg = "\nA:\n%s\nB:\n%s" % (expect, jeltxt) _.assertEqual(expect, jeltxt, msg)
def check(self, seq): global Cur, KW # Read expected text, remove any unicode BOM or trailing whitespace # that may have been added when editing. expected = open("data/fmtjel/" + str(seq) + ".txt", encoding='utf-8').read().rstrip() if expected[0] == '\ufeff': expected = expected[1:] # Read the entry from the database. Be sure to get from the right # corpus and get only the currently active entry. Assert that we # received excatly one entry. sql = "SELECT id FROM entr WHERE src=1 AND seq=%s AND stat=2 AND NOT unap" entrs, data = jdb.entrList(Cur, sql, (seq, ), ret_tuple=True) self.assertEqual(1, len(entrs)) # Add the annotations needed for dislaying xrefs in condensed form. jdb.augment_xrefs(Cur, data['xref']) jdb.augment_xrefs(Cur, data['xrer'], rev=True) fmtjel.markup_xrefs(Cur, data['xref']) # Test fmtjel by having it convert the entry to JEL. resulttxt = fmtjel.entr(entrs[0]).splitlines(True) # Confirm that the received text matched the expected text. if resulttxt: resulttxt = ''.join(resulttxt[1:]) self.assert_(10 < len(resulttxt)) msg = "\nExpected:\n%s\nGot:\n%s" % (expected, resulttxt) self.assertEqual(expected, resulttxt, msg)
def main (args, opts): jdb.reset_encoding (sys.stdout, 'utf-8') errs = [] try: form, svc, dbg, cur, sid, sess, parms, cfg = jmcgi.parseform() except Exception as e: jmcgi.err_page ([str (e)]) entries = jmcgi.get_entrs (cur, form.getlist ('e'), form.getlist ('q'), errs) if errs: jmcgi.err_page (errs) # Add a .SEQKR attribute to each entry in 'entries' that # gives the kanji and reading of the newest (most recently # edited) entry that has the same sequence number. seqkr_decorate (entries) # Sort the entries. The sorting order will group entries # with the same sequence number (.src,.seq) together and # each of those groups will be ordered by the kanji/reading # of the newest (most recently edited) entry in the group. # (The kanji and/or readings of an entry are sometimes changed # and this order will keep the changed entries together with # their pre-changed versions, while maintaining an overall # ordering by kanji/reading.) Within each group having the # same sequence number, entries are sorted in descending order # by the timestamp of the most recent history; that is, from # the most recently edited entry to the least recently edited # one. entries.sort (key=lambda e: ( e.SEQKR[0], e.SEQKR[1], e.src, e.seq, # In case different seqs have same SEQKR. # e._hist[*].dt is a datatime.datetime instance. -(e._hist[-1].dt.timestamp() if e._hist else 0), -e.id)) for e in entries: for s in e._sens: if hasattr (s, '_xref'): jdb.augment_xrefs (cur, s._xref) if hasattr (s, '_xrer'): jdb.augment_xrefs (cur, s._xrer, 1) if hasattr (e, '_snd'): jdb.augment_snds (cur, e._snd) cur.close() disp = form.getfirst ('disp') if disp == 'xml': etxts = [fmtxml.entr (e) for e in entries] elif disp == 'jm': etxts = [fmtxml.entr (e, compat='jmdict') for e in entries] elif disp == 'jmne': etxts = [fmtxml.entr (e, compat='jmnedict') for e in entries] elif disp == 'jel': etxts = [fmtjel.entr (e) for e in entries] elif disp == 'ed': etxts = [xslfmt.entr (e) for e in entries] else: etxts = ['' for e in entries] jmcgi.htmlprep (entries) jmcgi.add_encodings (entries) # For kanjidic entries. if disp == 'ed': etxts = [jmcgi.txt2html (x) for x in etxts] jmcgi.add_filtered_xrefs (entries, rem_unap=True) if errs: jmcgi.err_page (errs) jmcgi.jinja_page ('entr.jinja', entries=list(zip(entries, etxts)), disp=disp, svc=svc, dbg=dbg, sid=sid, session=sess, cfg=cfg, parms=parms, this_page='entr.py')
def test_001(_): e1 = Entr(id=100, src=1, seq=1000010, stat=2, unap=False) expect = 'jmdict 1000010 A {100}\n\n\n' jeltxt = fmtjel.entr(e1) _.assertEqual(expect, jeltxt)