def find_entries(cur, stat, sopts='', interval='30 days'): # cur -- An open psycopg2 cursor to a JMdictDB database. # stat -- A sequence of kwstat.id numbers. Only entries with one # of these values in its 'stat' column will be processed. # 4 is "deleted", 6 is "rejected". # interval -- A string giving a Postgresql interval spec. # sopts -- A string, described in the help for --corpus, # giving the corpra to be processed. sclause = parse_corpus_opt(sopts, 'e.src') sql = "SELECT e.id "\ "FROM entr e "\ "JOIN hist h ON h.entr=e.id "\ "WHERE e.stat IN %%s AND NOT e.unap %s "\ "AND NOT EXISTS (SELECT 1 FROM entr WHERE dfrm=e.id) "\ "GROUP BY e.id "\ "HAVING MAX(dt)<(CURRENT_TIMESTAMP AT TIME ZONE 'utc'-%%s::INTERVAL) "\ "ORDER BY id" % sclause # 'stat' needs to be a tuple when used as the argument for an IN # clause in pysgopg2. psycopg2 will convert a list to a Postgresql # array which won't work. tmptbl = jdb.entrFind(cur, sql, (tuple(stat), interval)) entrs, raw = jdb.entrList(cur, tmptbl, None, ord="src,seq,id", ret_tuple=True) jdb.augment_xrefs(cur, raw['xref']) return entrs, tmptbl
def main(): cur = jdb.dbOpen('jmnew') entrs, data = jdb.entrList(cur, [542], ret_tuple=True) jdb.augment_xrefs(cur, data['xref']) jdb.augment_xrefs(cur, data['xref'], rev=1) markup_xrefs(cur, data['xref']) for e in entrs: txt = entr(e) print(txt)
def main(args, opts): # The following call creates a database "cursor" that will # be used for subsequent database operations. It also, as # a side-effect, create a global variable in module 'jdb' # named 'KW' which contains data read from all the keyword # database tables (tables with names matchingthe pattern # "kw*". We read this data once at program startup to avoid # multiple hi-cost trips to the database later. try: cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts)) except jdb.dbapi.OperationalError as e: print("Error, unable to connect to database, do you need -u or -p?\n", str(e), file=sys.stderr) sys.exit(1) enc = opts.encoding or sys.stdout.encoding or 'utf-8' jdb.reset_encoding(sys.stdout, encoding=enc) # Get the command line options and convert them into a sql # statement that will find the desired entries. sql, sqlargs = opts2sql(args, opts) if opts.debug: print(("%s %s" % (sql, repr(sqlargs)))) # Retrieve the entries from the database. 'entrs' will be # set to a list on entry objects. 'raw' is set to dictionary, # keyed by table name, and with values consisting of all the # rows retrieved from that table. entrs, raw = jdb.entrList(cur, sql, sqlargs, ret_tuple=True) # Any xrefs in the retrieved entry objects contain contain only # the entry id numbers of the referenced entries. We want to be # able to show the refernced entriy's kanji, glosses, etc so we # call "augment_xrefs" to get this extra information. Same for # any reverse refrerences. jdb.augment_xrefs(cur, raw['xref']) jdb.augment_xrefs(cur, raw['xrer'], rev=1) jdb.add_xsens_lists(raw['xref']) jdb.mark_seq_xrefs(cur, raw['xref']) # Now all we have to do is print the entries. first = True for e in entrs: # Format the entry for printing, according to the # kind of out put the user requested. if opts.jel: txt = fmtjel.entr(e) else: txt = fmt.entr(e) # Print the formatted entry using the requested encoding # and inserting a blank line between entries. if not first: print() print(txt) first = False if len(entrs) == 0: print("No entries found")
def render_day_updates(y, m, d, n, formvalues): # If we have a specific date, we will show the actual entries that # were modified on that date. We do this by retrieving Entr's for # any entries that have a 'hist' row with a 'dt' date on that day. # The Entr's are displayed using the standard entr.tal template # that is also used for displaying other "list of entries" results # (such as from the Search Results page). cur = formvalues[3] sql = '''SELECT DISTINCT e.id FROM entr e JOIN hist h on h.entr=e.id WHERE h.dt BETWEEN %s::timestamp AND %s::timestamp + interval '1 day' ''' day = datetime.date(y, m, d) if n: # 'n' is used to adjust the given date backwards by 'n' days. # Most frequently it is used with a value of 1 in conjuction # with "today's" date to get entries updated "yesterday" but # for consistency we make it work for any date and any value # of 'n'. day = day - datetime.timedelta(n) y, m, d = day.year, day.month, day.day entries = jdb.entrList(cur, sql, ( day, day, ), 'x.src,x.seq,x.id') # Prepare the entries for display... Augment the xrefs (so that # the xref seq# and kanji/reading texts can be shown rather than # just an entry id number. Do same for sounds. for e in entries: for s in e._sens: if hasattr(s, '_xref'): jdb.augment_xrefs(cur, s._xref) if hasattr(s, '_xrer'): jdb.augment_xrefs(cur, s._xrer, 1) if hasattr(e, '_snd'): jdb.augment_snds(cur, e._snd) cur.close() jmcgi.htmlprep(entries) jmcgi.add_filtered_xrefs(entries, rem_unap=True) form, svc, dbg, cur, sid, sess, parms, cfg = formvalues jmcgi.jinja_page('entr.jinja', entries=zip(entries, [None] * len(entries)), disp=None, svc=svc, dbg=dbg, sid=sid, session=sess, cfg=cfg, parms=parms, this_page='updates.py')
def _get_text_from_database(seq, src): cur = jdb.dbOpen('jmdict') KW = jdb.KW sql = "SELECT id FROM entr WHERE seq=%s AND src=%s" elist = jdb.entrList(cur, sql, [seq, src]) if not elist: print("Entry %s not found" % seq) return entr = elist[0] for s in entr._sens: jdb.augment_xrefs(cur, getattr(s, '_xref', [])) txt = fmtjel.entr(entr) txt = txt.partition('\n')[2] return txt
def _main(args, opts): cur = jdb.dbOpen('jmdict') while True: try: id = input("Id number? ") except EOFError: id = None if not id: break e, raw = jdb.entrList(cur, [int(id)], ret_tuple=True) jdb.augment_xrefs(cur, raw['xref']) if not e: print("Entry id %d not found" % id) else: txt = entr(e[0], compat=None) print(txt)
def getentry(cur, seq, src): # cur -- An open DBAPI cursor to a JMdictDB database. # seq -- Sequence number of entry to retrieve. # src -- Corpus id number of entry to retrieve. # Read the entry. If we get more than one, bail # and let the user fix the right version manually. # And the same of course if we find no entry. We # ignore entries that are rejected, or are deleted- # approved. KW = jdb.KW sql = "SELECT id FROM entr WHERE seq=%%s AND src=%%s "\ "AND (stat=%s OR (stat=%s and unap))"\ % (KW.STAT['A'].id, KW.STAT['D'].id) entries, raw = jdb.entrList(cur, sql, (seq, src), ret_tuple=True) jdb.augment_xrefs(cur, raw['xref']) if len(entries) > 1: raise MultipleError(seq) if len(entries) == 0: raise MissingError(seq) entr = entries[0] if entr.dfrm: raise ChildError(seq) return entr
def rt(_, seq): # Test round trip from entry object through # serialize.serialize, serialize.unserialize, back to # object. Compare input and output objects # by converting both to xml and comparing # text. (Watch out for order problems). # FIXME: reading database to slow, too volatile. # read from a test xml file instead. if not Cursor: globalSetup() # FIXME: don't hardwire corpus (aka src). sql = "SELECT id FROM entr WHERE seq=%s AND src=1" elist, r = jdb.entrList(Cursor, sql, [seq], ret_tuple=1) e1 = elist[0] jdb.augment_xrefs(Cursor, r['xref']) s = serialize.serialize(e1) e2 = serialize.unserialize(s) f1 = fmtxml.entr(e1) _.assert_(len(f1) > 40) # Sanity check to detect empty entry. f2 = fmtxml.entr(e2) _.assertEqual(f1, f2)
def _roundtrip(cur, lexer, parser, seq, src): # Helper function useful for testing. It will read an entry # identified by 'seq' and 'src' from the database opened on the # dpapi cursor object 'cur', convert that entry to a JEL text # string, parse the text to get a new entry object, and convert # that entry object top JEL text. The text generated from the # the original object, and from the parser-generated object, # are returned and can be compared. The should be identical. #pdb.set_trace() sql = "SELECT id FROM entr WHERE seq=%s AND src=%s" obj = jdb.entrList(cur, sql, [seq, src]) if not obj: return None, None for s in obj[0]._sens: jdb.augment_xrefs(cur, getattr(s, '_xref', [])) jeltxt = _get_jel_text(obj[0]) jellex.lexreset(lexer, jeltxt) result = parser.parse(jeltxt, lexer=lexer, tracking=True) resolv_xrefs(cur, result) jeltxt2 = _get_jel_text(result) return jeltxt, jeltxt2
def check(self, seq): global Cur, KW # Read expected text, remove any unicode BOM or trailing whitespace # that may have been added when editing. expected = open("data/fmtjel/" + str(seq) + ".txt", encoding='utf-8').read().rstrip() if expected[0] == '\ufeff': expected = expected[1:] # Read the entry from the database. Be sure to get from the right # corpus and get only the currently active entry. Assert that we # received excatly one entry. sql = "SELECT id FROM entr WHERE src=1 AND seq=%s AND stat=2 AND NOT unap" entrs, data = jdb.entrList(Cur, sql, (seq, ), ret_tuple=True) self.assertEqual(1, len(entrs)) # Add the annotations needed for dislaying xrefs in condensed form. jdb.augment_xrefs(Cur, data['xref']) jdb.augment_xrefs(Cur, data['xrer'], rev=True) fmtjel.markup_xrefs(Cur, data['xref']) # Test fmtjel by having it convert the entry to JEL. resulttxt = fmtjel.entr(entrs[0]).splitlines(True) # Confirm that the received text matched the expected text. if resulttxt: resulttxt = ''.join(resulttxt[1:]) self.assert_(10 < len(resulttxt)) msg = "\nExpected:\n%s\nGot:\n%s" % (expected, resulttxt) self.assertEqual(expected, resulttxt, msg)
def submission(dbh, entr, disp, errs, is_editor=False, userid=None): # Add a changed entry, 'entr', to the jmdictdb database accessed # by the open DBAPI cursor, 'dbh'. # # dbh -- An open DBAPI cursor # entr -- A populated Entr object that defines the entry to # be added. See below for description of how some of its # attributes affect the submission. # disp -- Disposition, one of three string values: # '' -- Submit as normal user. # 'a' -- Approve this submission. # 'r' -- Reject this submission. # errs -- A list to which an error messages will be appended. # Note that if the error message contains html it should be # wrapped in jmcgi.Markup() to prevent it from being escaped # in the template. Conversely, error messages that contain # text from user input should NOT be so wrapped since they # must be escaped in the template. # is_editor -- True is this submission is being performed by # a logged in editor. Approved or Rejected dispositions will # fail if this is false. Its value may be conveniently # obtained from jmcgi.is_editor(). False if a normal user. # userid -- The userid if submitter is logged in editor or # None if not. # # Note that we never modify existing database entries other # than to sometimes completetly erase them. Submissions # of all three types (submit, approve, reject) *always* # result in the creation of a new entry object in the database. # The new entry will be created by writing 'entr' to the # database. The following attributes in 'entr' are relevant: # # entr.dfrm -- If None, this is a new submission. Otherwise, # it must be the id number of the entry this submission # is an edit of. # entr.stat -- Must be consistent with changes requested. In # particular, if it is 4 (Delete), changes made in 'entr' # will be ignored, and a copy of the parent entry will be # submitted with stat D. # entr.src -- Required to be set, new entry will copy. # # FIXME: prohibit non-editors from making src # # different than parent? # entr.seq -- If set, will be copied. If not set, submission # will get a new seq number but this untested and very # likely to break something. # # FIXME: prohibit non-editors from making seq number # # different than parent, or non-null if no parent? # entr.hist -- The last hist item on the entry will supply # the comment, email and name fields to newly constructed # comment that will replace it in the database. The time- # stamp and diff are regenerated and the userid field is # set from our userid parameter. # # FIXME: should pass history record explicity so that # # we can be sure if the caller is or is not supplying # # one. That will make it easier to use this function # # from other programs. # The following entry attributes need not be set: # entr.id -- Ignored (reset to None). # entr.unap -- Ignored (reset based on 'disp'). # Additionally, if 'is_editor' is false, the rdng._freq and # kanj._freq items will be copied from the parent entr rather # than using the ones supplied on 'entr'. See jdb.copy_freqs() # for details about how the copy works when the rdng's or kanj's # differ between the parent and 'entr'. KW = jdb.KW L('cgi.edsubmit.submission').info( ("disp=%s, is_editor=%s, userid=%s, entry id=%s,\n" + " " * 36 + "parent=%s, stat=%s, unap=%s, seq=%s, src=%s") % (disp, is_editor, userid, entr.id, entr.dfrm, entr.stat, entr.unap, entr.seq, entr.src)) L('cgi.edsubmit.submission').info("entry text: %s %s" % ((';'.join(k.txt for k in entr._kanj)), (';'.join(r.txt for r in entr._rdng)))) L('cgi.edsubmit.submission').debug("seqset: %s" % logseq(dbh, entr.seq, entr.src)) oldid = entr.id entr.id = None # Submissions, approvals and rejections will entr.unap = not disp # always produce a new db entry object so merge_rev = False # nuke any id number. if not entr.dfrm: # This is a submission of a new entry. entr.stat = KW.STAT['A'].id entr.seq = None # Force addentr() to assign seq number. pentr = None # No parent entr. edtree = None else: # Modification of existing entry. edroot = get_edroot(dbh, entr.dfrm) edtree = get_subtree(dbh, edroot) # Get the parent entry and augment the xrefs so when hist diffs # are generated, they will show xref details. L('cgi.edsubmit.submission').debug("reading parent entry %d" % entr.dfrm) pentr, raw = jdb.entrList(dbh, None, [entr.dfrm], ret_tuple=True) if len(pentr) != 1: L('cgi.edsubmit.submission').debug("missing parent %d" % entr.dfrm) # The editset may have changed between the time our user # displayed the Confirmation screen and they clicked the # Submit button. Changes involving unapproved edits result # in the addition of entries and don't alter the preexisting # tree shape. Approvals of edits, deletes or rejects may # affect our subtree and if so will always manifest themselves # as the disappearance of our parent entry. errs.append( "The entry you are editing no loger exists because it " "was approved, deleted or rejected. " "Please search for entry '%s' seq# %s and reenter your changes " "if they are still applicable." % (KW.SRC[ent.src].kw, entr.seq)) return pentr = pentr[0] jdb.augment_xrefs(dbh, raw['xref']) if entr.stat == KW.STAT['D'].id: # If this is a deletion, set $merge_rev. When passed # to function merge_hist() it will tell it to return the # edited entry's parent, rather than the edited entry # itself. The reason is that if we are doing a delete, # we do not want to make any changes to the entry, even # if the submitter has done so. merge_rev = True # Merge_hist() will combine the history entry in the submitted # entry with the all the previous history records in the # parent entry, so the the new entry will have a continuous # history. In the process it checks that the parent entry # exists -- it might not if someone else has approved a # different edit in the meantime. # merge_hist also returns an entry. If 'merge_rev' is false, # the entry returned is 'entr'. If 'merge_rev' is true, # the entry returned is the entr pointed to by 'entr.dfrm' # (i.e. the original entry that the submitter edited.) # This is done when a delete is requested and we want to # ignore any edits the submitter may have made (which 'entr' # will contain.) # Before calling merge_hist() check for a condition that would # cause merge_hist() to fail. if entr.stat == KW.STAT['D'].id and not getattr(entr, 'dfrm', None): L('cgi.edsubmit.submission').debug("delete of new entry error") errs.append("Delete requested but this is a new entry.") if disp == 'a' and has_xrslv(entr) and entr.stat == KW.STAT['A'].id: L('cgi.edsubmit.submission').debug("unresolved xrefs error") errs.append("Can't approve because entry has unresolved xrefs") if not errs: # If this is a submission by a non-editor, restore the # original entry's freq items which non-editors are not # allowed to change. if not is_editor: if pentr: L('cgi.edsubmit.submission').debug("copying freqs from parent") jdb.copy_freqs(pentr, entr) # Note that non-editors can provide freq items on new # entries. We expect an editor to vet this when approving. # Entr contains the hist record generate by the edconf.py # but it is not trustworthy since it could be modified or # created from scratch before we get it. So we extract # the unvalidated info from it (name, email, notes, refs) # and recreate it. h = entr._hist[-1] # When we get here, if merge_rev is true, pentr will also be # true. If we are wrong, add_hist() will throw an exception # but will never return a None, so no need to check return val. L('cgi.edsubmit.submission').debug("adding hist for '%s', merge=%s" % (h.name, merge_rev)) entr = jdb.add_hist(entr, pentr, userid, h.name, h.email, h.notes, h.refs, merge_rev) if not errs: # Occasionally, often from copy-pasting, a unicode BOM # character finds its way into one of an entry's text # strings. We quietly remove any here. n = jdb.bom_fixall(entr) if n > 0: L('cgi.edsubmit.submission').debug("removed %s BOM character(s)" % n) if not errs: if not disp: added = submit(dbh, entr, edtree, errs) elif disp == "a": added = approve(dbh, entr, edtree, errs) elif disp == "r": added = reject(dbh, entr, edtree, errs, None) else: L('cgi.edsubmit.submission').debug("bad url parameter (disp=%s)" % disp) errs.append("Bad url parameter (disp=%s)" % disp) L('cgi.edsubmit.submission').debug("seqset: %s" % logseq(dbh, entr.seq, entr.src)) if not errs: return added # Note that changes have not been committed yet, caller is # expected to do that. return None
def get_entrs(dbh, elist, qlist, errs, active=None, corpus=None): # Retrieve a set of Entr objects from the database, specified # by their entry id and/or seq numbers. # # dbh -- Open dbapi cursor to the current database. # elist -- List of id numbers of entries to get. Each number # may by either a integer or a string. # qlist -- List of seq numbers of entries to get. Each seq # number may be an integer or a string. If the latter # it may be followed by a period, and a corpus identifier # which is either the corpus id number or the corpus name. # errs -- Must be a list (or other append()able object) to # which any error messages will be appended. # active -- If 1, only active/approved or new/(unapproved) # entries will be retrieved. # If 2, at most one entry will be returned for each seq number # in the results and that entry will be the most recently edited # (chronologically based on history records) entry if one exists # of the approved active entry. # If active is any other value or not present, all entries # meeting the entry-id, seq, or seq-corpus criteria will be # retrieved. # corpus -- If not none, this is a corpus id number or name # and will apply to any seq numbers without an explicit # corpus given with the number. # # If the same entry is specified more than once in 'elist' and/or # 'qlist' ir will only occur once in the returned object list. # Objects in the returned list are in no particular order. eargs = [] qargs = [] xargs = [] whr = [] corpid = None if corpus is not None: corpid = corp2id(corpus) if corpid is None: errs.append("Bad corpus parameter: %s" % corpus) return [] for x in (elist or []): try: eargs.append(str2eid(str(x))) except ValueError: errs.append("Bad url parameter received: " + esc(x)) if eargs: whr.append("id IN (" + ','.join(['%s'] * len(eargs)) + ")") for x in (qlist or []): try: args = list(str2seq(str(x))) except ValueError: errs.append("Bad parameter received: " + esc(x)) else: if corpus and not args[1]: args[1] = corpid if args[1]: whr.append("(seq=%s AND src=%s)") qargs.extend(args) else: whr.append("seq=%s") qargs.append(args[0]) if not whr: errs.append("No valid entry or seq numbers given.") if errs: return None whr2 = '' distinct = '' hjoin = '' order = '' try: active = int(active) except (ValueError, TypeError): pass if active == 1: # Following will restrict returned rows to active/approved # (stat=A and not unap) or new (dfrm is NULL), that is, the # result set will not include any stat=D or stat=R results. whr2 = " AND stat=%s AND (NOT unap OR dfrm IS NULL)" xargs.append(jdb.KW.STAT['A'].id) elif active == 2: # Restrict returned rows to active (no stat=D or stat=R results) # and most recent edit as determined by the history records (if any). # In no event will more than one entry per seq number be returned. # Note that this will necessarily return the edit from only one # branch when multiple branches exist which may result in surprise # for a user when the returned entry shows no signs of a recent # edit known to have been made. # Example of generated sql: # SELECT DISTINCT ON (e.seq) e.id FROM entr e LEFT JOIN hist h ON h.entr=e.id # WHERE e.seq=2626330 and e.stat=2 ORDER BY e.seq,h.dt DESC NULLS LAST; whr2 = " AND e.stat=%s" xargs.append(jdb.KW.STAT['A'].id) distinct = " DISTINCT ON (e.seq)" hjoin = " LEFT JOIN hist h ON h.entr=e.id" # "NULLS LAST" is needed below because some entries (e.g., entries # imported when JMdictDB is first initialized and never edited) # may not have history records which will result in 'dt' values of # NULL; we want those entries last. order = " ORDER BY e.seq,h.dt DESC NULLS LAST" sql = "SELECT" + distinct + " e.id FROM entr e " \ + hjoin + " WHERE (" + " OR ".join (whr) + ")" + whr2 + order entries, raw = jdb.entrList(dbh, sql, eargs + qargs + xargs, ret_tuple=True) if entries: jdb.augment_xrefs(dbh, raw['xref']) jdb.augment_xrefs(dbh, raw['xrer'], rev=1) jdb.add_xsens_lists(raw['xref']) jdb.mark_seq_xrefs(dbh, raw['xref']) return entries
def main(args, opts): jdb.reset_encoding(sys.stdout, 'utf-8') errs = [] chklist = {} try: form, svc, dbg, cur, sid, sess, parms, cfg = jmcgi.parseform() except Exception as e: jmcgi.err_page([str(e)]) fv = form.getfirst fl = form.getlist KW = jdb.KW # 'eid' will be an integer if we are editing an existing # entry, or undefined if this is a new entry. pentr = None eid = url_int('id', form, errs) if eid: # Get the parent entry of the edited entry. This is what the # edited entry will be diff'd against for the history record. # It is also the entry that will be pointed to by the edited # entry's 'dfrm' field. pentr = jdb.entrList(cur, None, [eid]) #FIXME: Need a better message with more explanation. if not pentr: errs.append("The entry you are editing has been deleted.") else: pentr = pentr[0] # Desired disposition: 'a':approve, 'r':reject, undef:submit. disp = url_str('disp', form) if disp != 'a' and disp != 'r' and disp != '' and disp is not None: errs.append("Invalid 'disp' parameter: '%s'" % disp) # New status is A for edit of existing or new entry, D for # deletion of existing entry. delete = fv('delete') makecopy = fv('makecopy') if delete and makecopy: errs.append( "The 'delete' and 'treat as new'" " checkboxes are mutually exclusive; please select only one.") if makecopy: eid = None # FIXME: we need to disallow new entries with corp.seq # that matches an existing A, A*, R*, D*, D? entry. # Do same check in submit.py. seq = url_int('seq', form, errs) src = url_int('src', form, errs) notes = url_str('notes', form) srcnote = url_str('srcnote', form) # These are the JEL (JMdict Edit Language) texts which # we will concatenate into a string that is fed to the # JEL parser which will create an Entr object. kanj = (stripws(url_str('kanj', form))).strip() rdng = (stripws(url_str('rdng', form))).strip() sens = (url_str('sens', form)).strip() intxt = "\f".join((kanj, rdng, sens)) grpstxt = url_str('grp', form) # Get the meta-edit info which will go into the history # record for this change. comment = url_str('comment', form) refs = url_str('reference', form) name = url_str('name', form) email = url_str('email', form) if errs: jmcgi.err_page(errs) # Parse the entry data. Problems will be reported # by messages in 'perrs'. We do the parse even if # the request is to delete the entry (is this right # thing to do???) since on the edconf page we want # to display what the entry was. The edsubmit page # will do the actual deletion. entr, errs = parse(intxt) # 'errs' is a list which if not empty has a single item # which is a 2-seq of str's: (error-type, error-message). if errs or not entr: if not entr and not errs: errs = ([], "Unable to create an entry from your input.") jmcgi.err_page([errs[0][1]], prolog=errs[0][0], cssclass="errormsg") entr.dfrm = eid entr.unap = not disp # To display the xrefs and reverse xrefs in html, they # need to be augmented with additional info about their # targets. collect_refs() simply returns a list Xref # objects that are on the entr argument's .xref list # (forward xrefs) if rev not true, or the Xref objects # on the entr argument's ._xrer list (reverse xrefs) if # rev is true). This does not remove them from the entry # and is done simply for convenience so we can have # augment_xrefs() process them all in one shot. # augment_xrefs add an attribute, .TARG, to each Xref # object whose value is an Entr object for the entry the # xref points to if rev is not true, or the entry the xref # is from, if rev is true. These Entr objects can be used # to display info about the xref target or source such as # seq#, reading or kanji. See jdb.augment_xrefs() for details. # Note that <xrefs> and <xrers> below contain references # to the xrefs on the entries; thus the augmentation done # by jdb.augment_xrefs() alters the xref objects on those # entries. if pentr: x = jdb.collect_xrefs([pentr]) if x: jdb.augment_xrefs(cur, x) # Although we don't allow editing of an entry's reverse # xref, we still augment them (on the parent entry) # because we will display them. x = jdb.collect_xrefs([pentr], rev=True) if x: jdb.augment_xrefs(cur, x, rev=True) x = jdb.collect_xrefs([entr]) if x: jdb.augment_xrefs(cur, x) if delete: # Ignore any content changes made by the submitter by # restoring original values to the new entry. entr.seq = pentr.seq entr.src = pentr.src entr.stat = KW.STAT['D'].id entr.notes = pentr.notes entr.srcnote = pentr.srcnote entr._kanj = getattr(pentr, '_kanj', []) entr._rdng = getattr(pentr, '_rdng', []) entr._sens = getattr(pentr, '_sens', []) entr._snd = getattr(pentr, '_snd', []) entr._grp = getattr(pentr, '_grp', []) entr._cinf = getattr(pentr, '_cinf', []) else: # Migrate the entr details to the new entr object # which to this point has only the kanj/rdng/sens # info provided by jbparser. entr.seq = seq entr.src = src entr.stat = KW.STAT['A'].id entr.notes = notes entr.srcnote = srcnote entr._grp = jelparse.parse_grp(grpstxt) # This form and the JEL parser provide no way to change # some entry attributes such _cinf, _snd, reverse xrefs # and for non-editors, _freq. We need to copy these items # from the original entry to the new, edited entry to avoid # loosing them. The copy can be shallow since we won't be # changing the copied content. if pentr: if not jmcgi.is_editor(sess): jdb.copy_freqs(pentr, entr) if hasattr(pentr, '_cinf'): entr._cinf = pentr._cinf copy_snd(pentr, entr) # Copy the reverse xrefs that are on pentr to entr, # removing any that are no longer valid because they # refer to senses , readings or kanji no longer present # on the edited entry. Note that these have already # been augmented above. nuked_xrers = realign_xrers(entr, pentr) if nuked_xrers: chklist['xrers'] = format_for_warnings(nuked_xrers, pentr) # Add sound details so confirm page will look the same as the # original entry page. Otherwise, the confirm page will display # only the sound clip id(s). #FIXME? Should the following snd augmentation stuff be outdented # one level so that it is done in both the delete and non-delete # paths? snds = [] for s in getattr(entr, '_snd', []): snds.append(s) for r in getattr(entr, '_rdng', []): for s in getattr(r, '_snd', []): snds.append(s) if snds: jdb.augment_snds(cur, snds) # If any xrefs were given, resolve them to actual entries # here since that is the form used to store them in the # database. If any are unresolvable, an approriate error # is saved and will reported later. rslv_errs = jelparse.resolv_xrefs(cur, entr) if rslv_errs: chklist['xrslv'] = rslv_errs if errs: jmcgi.err_page(errs) # Append a new hist record details this edit. if not hasattr(entr, '_hist'): entr._hist = [] entr = jdb.add_hist(entr, pentr, sess.userid if sess else None, name, email, comment, refs, entr.stat == KW.STAT['D'].id) if not delete: check_for_errors(entr, errs) if errs: jmcgi.err_page(errs) pseq = pentr.seq if pentr else None check_for_warnings(cur, entr, pseq, chklist) # The following all expect a list of entries. jmcgi.add_filtered_xrefs([entr], rem_unap=False) serialized = serialize.serialize([entr]) jmcgi.htmlprep([entr]) entrs = [[entr, None]] # Package 'entr' as expected by entr.jinja. jmcgi.jinja_page("edconf.jinja", entries=entrs, serialized=serialized, chklist=chklist, disp=disp, parms=parms, svc=svc, dbg=dbg, sid=sid, session=sess, cfg=cfg, this_page='edconf.py')
def main(args, opts): global Debug Debug = opts.debug # Open the database. jdb.dbopts() extracts the db-related # options from the command line options in 'opts'. cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts)) # If no "--root" option was supplied, choose a default based # on the value of the "--compat" option. if not opts.root: if opts.compat in ('jmnedict', 'jmneold'): opts.root = 'JMnedict' else: opts.root = 'JMdict' outf = None if not opts.nodtd: # Choose a dtd to use based on the "--compat" option. # The dtd file is expected to be located somewhere in the # pythonpath (sys.path) directories. if opts.compat == 'jmdict': dtd = "dtd-jmdict.xml" elif opts.compat == 'jmdicthist': dtd = "dtd-jmdict.xml" elif opts.compat == 'jmnedict': dtd = "dtd-jmnedict.xml" elif opts.compat == 'jmneold': dtd = "dtd-jmneold.xml" else: dtd = "dtd-jmdict-ex.xml" dir = jdb.find_in_syspath(dtd) dtdfn = dir + "/" + dtd # Fully qualified dtd file name. # jdb.get_dtd() reads the dtd text, and replaces the root # element name name and encoding with the values supplied # in the arguments. dtdtxt = jdb.get_dtd(dtdfn, opts.root, opts.encoding) if len(args) == 0: outf = sys.stdout else: outf = open(args[0], "w") jdb.reset_encoding(outf, opts.encoding) outf.write(dtdtxt) if opts.seqfile: if opts.seqfile == '-': f = sys.stdin else: f = open(opts.seqfile) #FIXME: we should read these incrementally. entrlist = [int(x) for x in f.read().split()] # seq# separated by sp or nl. if f != sys.stdin: f.close() # Turn the "--corpus" option value into a string that can be # and'ed into a SQL WHERE clause to restrict the results to # the specified corpora. corp_terms = parse_corpus_opt(opts.corpus, 'e.src') # If the output file was not opened in the dtd section # above, open it now. We postpose opening it until the # last possible moment to avoid creating it and then # bombing because there was a typo in the input or dtd # filename, etc. # FIXME: Should do a "write" function that opens the # file just before writing. if not outf: if len(args) == 0: outf = sys.stdout else: outf = open(args[0], "w") whr_act = " AND NOT unap AND stat=" + str( jdb.KW.STAT['A'].id) if opts.compat else "" if opts.begin: # If a "--begin" sequence number was given, we need to read # the entr record so we can get the src id number. Complain # and exit if not found. Complain if more than one entry # with the requested seq number exists. More than one may be # found since the same sequence number may exist in different # corpora, or in the same corpus if an entry was edited. # #FIXME: no way to select from multiple entries with same seq # number. Might want just the stat="A" entries for example. sql = "SELECT id,seq,src FROM entr e WHERE seq=%s%s%s ORDER BY src" \ % (int(opts.begin), corp_terms, whr_act) if Debug: print(sql, file=sys.stderr) start = time.time() rs = jdb.dbread(cur, sql) if Debug: print("Time: %s (init read)" % (time.time() - start), file=sys.stderr) if not rs: print ("No entry with seq '%s' found" \ % opts.begin, file=sys.stderr) sys.exit(1) if len(rs) > 1: print ("Multiple entries having seq '%s' found, results " \ "may not be as expected. Consider using -s to " \ "restrict to a single corpus." % (opts.begin), file=sys.stderr) lastsrc, lastseq, lastid = rs[0].src, rs[0].seq, rs[0].id if not opts.begin and not opts.seqfile: # If no "--begin" option, remove the " AND" from the front of # the 'corp_terms' string. Read the first entry (by seq number) # in the requested corpora. cc = corp_terms[4:] if corp_terms else 'True' # If compat (jmdict or jmnedict), restrict the xml to Active # entries only. sql = "SELECT id,seq,src FROM entr e WHERE %s%s ORDER BY src,seq LIMIT 1" % ( cc, whr_act) start = time.time() if Debug: print(sql, file=sys.stderr) rs = jdb.dbread(cur, sql) if Debug: print("Time: %s (init read)" % (time.time() - start), file=sys.stderr) lastsrc, lastseq, lastid = rs[0].src, rs[0].seq, rs[0].id # Add an enclosing root element only if we are also including # a DTD (ie, producing a full XML file). Otherwise, the file # generated will just be a list of <entr> elements. if not opts.nodtd: if opts.compat: # Add a date comment... today = time.strftime("%Y-%m-%d", time.localtime()) outf.write("<!-- %s created: %s -->\n" % (opts.root, today)) outf.write('<%s>\n' % opts.root) entrlist_loc = 0 count = opts.count done = 0 blksize = opts.blocksize corpora = set() while count is None or count > 0: if opts.seqfile: seqnums = tuple(entrlist[entrlist_loc:entrlist_loc + blksize]) if not seqnums: break entrlist_loc += blksize #FIXME: need detection of non-existent seq#s. sql = "SELECT id FROM entr e WHERE seq IN %s" + corp_terms + whr_act sql_args = [seqnums] if Debug: print(sql, sql_args, file=sys.stderr) start = time.time() tmptbl = jdb.entrFind(cur, sql, sql_args) else: # In this loop we read blocks of 'blksize' entries. Each # block read is ordered by entr src (i.e. corpus), seq, and # id. The block to read is specified in WHERE clause which # is effectively: # WHERE ((e.src=lastsrc AND e.seq=lastseq AND e.id>=lastid+1) # OR (e.src=lastsrc AND e.seq>=lastseq) # OR e.src>lastsrc) # and (lastsrc, lastseq, lastid) are from the last entry in # the last block read. whr = "WHERE ((e.src=%%s AND e.seq=%%s AND e.id>=%%s) " \ "OR (e.src=%%s AND e.seq>%%s) " \ "OR e.src>%%s) %s%s" % (corp_terms, whr_act) sql = "SELECT e.id FROM entr e" \ " %s ORDER BY src,seq,id LIMIT %d" \ % (whr, blksize if count is None else min (blksize, count)) # The following args will be substituted for the "%%s" in # the sql above, in jbd.findEntr(). sql_args = [lastsrc, lastseq, lastid, lastsrc, lastseq, lastsrc] # Create a temporary table of id numbers and give that to # jdb.entrList(). This is an order of magnitude faster than # giving the above sql directly to entrList(). if Debug: print(sql, sql_args, file=sys.stderr) start = time.time() tmptbl = jdb.entrFind(cur, sql, sql_args) mid = time.time() entrs, raw = jdb.entrList(cur, tmptbl, None, ord="src,seq,id", ret_tuple=True) end = time.time() if Debug: print("read %d entries" % len(entrs), file=sys.stderr) if Debug: print("Time: %s (entrFind), %s (entrList)" % (mid - start, end - mid), file=sys.stderr) if not entrs: break write_entrs(cur, entrs, raw, corpora, opts, outf) # Update the 'last*' variables for the next time through # the loop. Also, decrement 'count', if we are counting. lastsrc = entrs[-1].src lastseq = entrs[-1].seq lastid = entrs[-1].id + 1 if count is not None: count -= blksize done += len(entrs) if not Debug: sys.stderr.write('.') else: print("%d entries written" % done, file=sys.stderr) if not opts.nodtd: outf.writelines('</%s>\n' % opts.root) if not Debug: sys.stderr.write('\n') print("Wrote %d entries" % done, file=sys.stderr)