def main(args, opts): # The following call creates a database "cursor" that will # be used for subsequent database operations. It also, as # a side-effect, create a global variable in module 'jdb' # named 'KW' which contains data read from all the keyword # database tables (tables with names matchingthe pattern # "kw*". We read this data once at program startup to avoid # multiple hi-cost trips to the database later. try: cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts)) except jdb.dbapi.OperationalError as e: print("Error, unable to connect to database, do you need -u or -p?\n", str(e), file=sys.stderr) sys.exit(1) enc = opts.encoding or sys.stdout.encoding or 'utf-8' jdb.reset_encoding(sys.stdout, encoding=enc) # Get the command line options and convert them into a sql # statement that will find the desired entries. sql, sqlargs = opts2sql(args, opts) if opts.debug: print(("%s %s" % (sql, repr(sqlargs)))) # Retrieve the entries from the database. 'entrs' will be # set to a list on entry objects. 'raw' is set to dictionary, # keyed by table name, and with values consisting of all the # rows retrieved from that table. entrs, raw = jdb.entrList(cur, sql, sqlargs, ret_tuple=True) # Any xrefs in the retrieved entry objects contain contain only # the entry id numbers of the referenced entries. We want to be # able to show the refernced entriy's kanji, glosses, etc so we # call "augment_xrefs" to get this extra information. Same for # any reverse refrerences. jdb.augment_xrefs(cur, raw['xref']) jdb.augment_xrefs(cur, raw['xrer'], rev=1) jdb.add_xsens_lists(raw['xref']) jdb.mark_seq_xrefs(cur, raw['xref']) # Now all we have to do is print the entries. first = True for e in entrs: # Format the entry for printing, according to the # kind of out put the user requested. if opts.jel: txt = fmtjel.entr(e) else: txt = fmt.entr(e) # Print the formatted entry using the requested encoding # and inserting a blank line between entries. if not first: print() print(txt) first = False if len(entrs) == 0: print("No entries found")
def roundtrip(cur, intxt): jellex.lexreset(Lexer, intxt) entr = Parser.parse(intxt, lexer=Lexer) entr.src = 1 jelparse.resolv_xrefs(cur, entr) for s in entr._sens: jdb.augment_xrefs(cur, getattr(s, '_xref', [])) for s in entr._sens: jdb.add_xsens_lists(getattr(s, '_xref', [])) for s in entr._sens: jdb.mark_seq_xrefs(cur, getattr(s, '_xref', [])) outtxt = fmtjel.entr(entr, nohdr=True) return outtxt
def roundtrip(cur, intxt): # Since hg-180523-6b1a12 we use '\f' to separate the kanji, reading # and senses sections in JEL text used as input to jelparse() # rather than '\n' which was previously used. To avoid changing # all the test data that still uses '\n', we call secsepfix() to # replace the first two '\n's in the test data with '\f's to make # suitable for parsing. intxt = secsepfix(intxt) jellex.lexreset(Lexer, intxt) entr = Parser.parse(intxt, lexer=Lexer) entr.src = 1 jelparse.resolv_xrefs(cur, entr) for s in entr._sens: jdb.augment_xrefs(cur, getattr(s, '_xref', [])) for s in entr._sens: jdb.add_xsens_lists(getattr(s, '_xref', [])) for s in entr._sens: jdb.mark_seq_xrefs(cur, getattr(s, '_xref', [])) outtxt = fmtjel.entr(entr, nohdr=True) return outtxt
def markup_xrefs(cur, xrefs): jdb.add_xsens_lists(xrefs) jdb.mark_seq_xrefs(cur, xrefs)
def get_entrs(dbh, elist, qlist, errs, active=None, corpus=None): # Retrieve a set of Entr objects from the database, specified # by their entry id and/or seq numbers. # # dbh -- Open dbapi cursor to the current database. # elist -- List of id numbers of entries to get. Each number # may by either a integer or a string. # qlist -- List of seq numbers of entries to get. Each seq # number may be an integer or a string. If the latter # it may be followed by a period, and a corpus identifier # which is either the corpus id number or the corpus name. # errs -- Must be a list (or other append()able object) to # which any error messages will be appended. # active -- If 1, only active/approved or new/(unapproved) # entries will be retrieved. # If 2, at most one entry will be returned for each seq number # in the results and that entry will be the most recently edited # (chronologically based on history records) entry if one exists # of the approved active entry. # If active is any other value or not present, all entries # meeting the entry-id, seq, or seq-corpus criteria will be # retrieved. # corpus -- If not none, this is a corpus id number or name # and will apply to any seq numbers without an explicit # corpus given with the number. # # If the same entry is specified more than once in 'elist' and/or # 'qlist' ir will only occur once in the returned object list. # Objects in the returned list are in no particular order. eargs = [] qargs = [] xargs = [] whr = [] corpid = None if corpus is not None: corpid = corp2id(corpus) if corpid is None: errs.append("Bad corpus parameter: %s" % corpus) return [] for x in (elist or []): try: eargs.append(str2eid(str(x))) except ValueError: errs.append("Bad url parameter received: " + esc(x)) if eargs: whr.append("id IN (" + ','.join(['%s'] * len(eargs)) + ")") for x in (qlist or []): try: args = list(str2seq(str(x))) except ValueError: errs.append("Bad parameter received: " + esc(x)) else: if corpus and not args[1]: args[1] = corpid if args[1]: whr.append("(seq=%s AND src=%s)") qargs.extend(args) else: whr.append("seq=%s") qargs.append(args[0]) if not whr: errs.append("No valid entry or seq numbers given.") if errs: return None whr2 = '' distinct = '' hjoin = '' order = '' try: active = int(active) except (ValueError, TypeError): pass if active == 1: # Following will restrict returned rows to active/approved # (stat=A and not unap) or new (dfrm is NULL), that is, the # result set will not include any stat=D or stat=R results. whr2 = " AND stat=%s AND (NOT unap OR dfrm IS NULL)" xargs.append(jdb.KW.STAT['A'].id) elif active == 2: # Restrict returned rows to active (no stat=D or stat=R results) # and most recent edit as determined by the history records (if any). # In no event will more than one entry per seq number be returned. # Note that this will necessarily return the edit from only one # branch when multiple branches exist which may result in surprise # for a user when the returned entry shows no signs of a recent # edit known to have been made. # Example of generated sql: # SELECT DISTINCT ON (e.seq) e.id FROM entr e LEFT JOIN hist h ON h.entr=e.id # WHERE e.seq=2626330 and e.stat=2 ORDER BY e.seq,h.dt DESC NULLS LAST; whr2 = " AND e.stat=%s" xargs.append(jdb.KW.STAT['A'].id) distinct = " DISTINCT ON (e.seq)" hjoin = " LEFT JOIN hist h ON h.entr=e.id" # "NULLS LAST" is needed below because some entries (e.g., entries # imported when JMdictDB is first initialized and never edited) # may not have history records which will result in 'dt' values of # NULL; we want those entries last. order = " ORDER BY e.seq,h.dt DESC NULLS LAST" sql = "SELECT" + distinct + " e.id FROM entr e " \ + hjoin + " WHERE (" + " OR ".join (whr) + ")" + whr2 + order entries, raw = jdb.entrList(dbh, sql, eargs + qargs + xargs, ret_tuple=True) if entries: jdb.augment_xrefs(dbh, raw['xref']) jdb.augment_xrefs(dbh, raw['xrer'], rev=1) jdb.add_xsens_lists(raw['xref']) jdb.mark_seq_xrefs(dbh, raw['xref']) return entries