Esempi in Python per entrList, esempi in Python per jdb.entrList

Esempio n. 1

0

Mostra file

File: dbreaper.py Progetto: cobysy/jmdictdb

def find_entries(cur, stat, sopts='', interval='30 days'):
    # cur -- An open psycopg2 cursor to a JMdictDB database.
    # stat -- A sequence of kwstat.id numbers.  Only entries with one
    #       of these values in its 'stat' column will be processed.
    #       4 is "deleted", 6 is "rejected".
    # interval -- A string giving a Postgresql interval spec.
    # sopts -- A string, described in the help for --corpus,
    #       giving the corpra to be processed.

    sclause = parse_corpus_opt(sopts, 'e.src')
    sql = "SELECT e.id "\
            "FROM entr e "\
            "JOIN hist h ON h.entr=e.id "\
            "WHERE e.stat IN %%s AND NOT e.unap %s "\
              "AND NOT EXISTS (SELECT 1 FROM entr WHERE dfrm=e.id) "\
            "GROUP BY e.id "\
            "HAVING MAX(dt)<(CURRENT_TIMESTAMP AT TIME ZONE 'utc'-%%s::INTERVAL) "\
            "ORDER BY id" % sclause
    # 'stat' needs to be a tuple when used as the argument for an IN
    # clause in pysgopg2.  psycopg2 will convert a list to a Postgresql
    # array which won't work.
    tmptbl = jdb.entrFind(cur, sql, (tuple(stat), interval))
    entrs, raw = jdb.entrList(cur,
                              tmptbl,
                              None,
                              ord="src,seq,id",
                              ret_tuple=True)
    jdb.augment_xrefs(cur, raw['xref'])
    return entrs, tmptbl

Esempio n. 2

0

Mostra file

def main():
    cur = jdb.dbOpen('jmnew')
    entrs, data = jdb.entrList(cur, [542], ret_tuple=True)
    jdb.augment_xrefs(cur, data['xref'])
    jdb.augment_xrefs(cur, data['xref'], rev=1)
    markup_xrefs(cur, data['xref'])
    for e in entrs:
        txt = entr(e)
        print(txt)

Esempio n. 3

0

Mostra file

def main(args, opts):
    # The following call creates a database "cursor" that will
    # be used for subsequent database operations.  It also, as
    # a side-effect, create a global variable in module 'jdb'
    # named 'KW' which contains data read from all the keyword
    # database tables (tables with names matchingthe pattern
    # "kw*".  We read this data once at program startup to avoid
    # multiple hi-cost trips to the database later.
    try:
        cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts))
    except jdb.dbapi.OperationalError as e:
        print("Error, unable to connect to database, do you need -u or -p?\n",
              str(e),
              file=sys.stderr)
        sys.exit(1)

    enc = opts.encoding or sys.stdout.encoding or 'utf-8'
    jdb.reset_encoding(sys.stdout, encoding=enc)

    # Get the command line options and convert them into a sql
    # statement that will find the desired entries.
    sql, sqlargs = opts2sql(args, opts)
    if opts.debug:
        print(("%s  %s" % (sql, repr(sqlargs))))

    # Retrieve the entries from the database.  'entrs' will be
    # set to a list on entry objects.  'raw' is set to dictionary,
    # keyed by table name, and with values consisting of all the
    # rows retrieved from that table.
    entrs, raw = jdb.entrList(cur, sql, sqlargs, ret_tuple=True)

    # Any xrefs in the retrieved entry objects contain contain only
    # the entry id numbers of the referenced entries.  We want to be
    # able to show the refernced entriy's kanji, glosses, etc so we
    # call "augment_xrefs" to get this extra information.  Same for
    # any reverse refrerences.
    jdb.augment_xrefs(cur, raw['xref'])
    jdb.augment_xrefs(cur, raw['xrer'], rev=1)
    jdb.add_xsens_lists(raw['xref'])
    jdb.mark_seq_xrefs(cur, raw['xref'])

    # Now all we have to do is print the entries.
    first = True
    for e in entrs:
        # Format the entry for printing, according to the
        # kind of out put the user requested.
        if opts.jel: txt = fmtjel.entr(e)
        else: txt = fmt.entr(e)

        # Print the formatted entry using the requested encoding
        # and inserting a blank line between entries.
        if not first: print()
        print(txt)
        first = False

    if len(entrs) == 0: print("No entries found")

Esempio n. 4

0

Mostra file

def render_day_updates(y, m, d, n, formvalues):
    # If we have a specific date, we will show the actual entries that
    # were modified on that date.  We do this by retrieving Entr's for
    # any entries that have a 'hist' row with a 'dt' date on that day.
    # The Entr's are displayed using the standard entr.tal template
    # that is also used for displaying other "list of entries" results
    # (such as from the Search Results page).

    cur = formvalues[3]
    sql = '''SELECT DISTINCT e.id
                 FROM entr e
                 JOIN hist h on h.entr=e.id
                 WHERE h.dt BETWEEN %s::timestamp
                            AND %s::timestamp + interval '1 day' '''

    day = datetime.date(y, m, d)
    if n:
        # 'n' is used to adjust the given date backwards by 'n' days.
        # Most frequently it is used with a value of 1 in conjuction
        # with "today's" date to get entries updated "yesterday" but
        # for consistency we make it work for any date and any value
        # of 'n'.
        day = day - datetime.timedelta(n)
        y, m, d = day.year, day.month, day.day

    entries = jdb.entrList(cur, sql, (
        day,
        day,
    ), 'x.src,x.seq,x.id')

    # Prepare the entries for display... Augment the xrefs (so that
    # the xref seq# and kanji/reading texts can be shown rather than
    # just an entry id number.  Do same for sounds.
    for e in entries:
        for s in e._sens:
            if hasattr(s, '_xref'): jdb.augment_xrefs(cur, s._xref)
            if hasattr(s, '_xrer'): jdb.augment_xrefs(cur, s._xrer, 1)
        if hasattr(e, '_snd'): jdb.augment_snds(cur, e._snd)
    cur.close()
    jmcgi.htmlprep(entries)
    jmcgi.add_filtered_xrefs(entries, rem_unap=True)

    form, svc, dbg, cur, sid, sess, parms, cfg = formvalues
    jmcgi.jinja_page('entr.jinja',
                     entries=zip(entries, [None] * len(entries)),
                     disp=None,
                     svc=svc,
                     dbg=dbg,
                     sid=sid,
                     session=sess,
                     cfg=cfg,
                     parms=parms,
                     this_page='updates.py')

Esempio n. 5

0

Mostra file

def _get_text_from_database(seq, src):
    cur = jdb.dbOpen('jmdict')
    KW = jdb.KW
    sql = "SELECT id FROM entr WHERE seq=%s AND src=%s"
    elist = jdb.entrList(cur, sql, [seq, src])
    if not elist:
        print("Entry %s not found" % seq)
        return
    entr = elist[0]
    for s in entr._sens:
        jdb.augment_xrefs(cur, getattr(s, '_xref', []))
    txt = fmtjel.entr(entr)
    txt = txt.partition('\n')[2]
    return txt

Esempio n. 6

0

Mostra file

File: fmtxml.py Progetto: gabriel4649/JMdictDB

def _main(args, opts):
    cur = jdb.dbOpen('jmdict')
    while True:
        try:
            id = input("Id number? ")
        except EOFError:
            id = None
        if not id: break
        e, raw = jdb.entrList(cur, [int(id)], ret_tuple=True)
        jdb.augment_xrefs(cur, raw['xref'])
        if not e:
            print("Entry id %d not found" % id)
        else:
            txt = entr(e[0], compat=None)
            print(txt)

Esempio n. 7

0

Mostra file

File: bulkupd.py Progetto: gabriel4649/JMdictDB

def getentry(cur, seq, src):
    # cur -- An open DBAPI cursor to a JMdictDB database.
    # seq -- Sequence number of entry to retrieve.
    # src -- Corpus id number of entry to retrieve.

    # Read the entry.  If we get more than one, bail
    # and let the user fix the right version manually.
    # And the same of course if we find no entry.  We
    # ignore entries that are rejected, or are deleted-
    # approved.
    KW = jdb.KW
    sql = "SELECT id FROM entr WHERE seq=%%s AND src=%%s "\
            "AND (stat=%s OR (stat=%s and unap))"\
            % (KW.STAT['A'].id, KW.STAT['D'].id)
    entries, raw = jdb.entrList(cur, sql, (seq, src), ret_tuple=True)
    jdb.augment_xrefs(cur, raw['xref'])
    if len(entries) > 1: raise MultipleError(seq)
    if len(entries) == 0: raise MissingError(seq)
    entr = entries[0]
    if entr.dfrm: raise ChildError(seq)
    return entr

Esempio n. 8

0

Mostra file

File: test_json.py Progetto: cobysy/jmdictdb

def rt(_, seq):
    # Test round trip from entry object through
    # serialize.serialize, serialize.unserialize, back to
    # object.  Compare input and output objects
    # by converting both to xml and comparing
    # text.  (Watch out for order problems).

    # FIXME: reading database to slow, too volatile.
    #   read from a test xml file instead.
    if not Cursor: globalSetup()
    # FIXME: don't hardwire corpus (aka src).
    sql = "SELECT id FROM entr WHERE seq=%s AND src=1"
    elist, r = jdb.entrList(Cursor, sql, [seq], ret_tuple=1)
    e1 = elist[0]
    jdb.augment_xrefs(Cursor, r['xref'])
    s = serialize.serialize(e1)
    e2 = serialize.unserialize(s)
    f1 = fmtxml.entr(e1)
    _.assert_(len(f1) > 40)  # Sanity check to detect empty entry.
    f2 = fmtxml.entr(e2)
    _.assertEqual(f1, f2)

Esempio n. 9

0

Mostra file

def _roundtrip(cur, lexer, parser, seq, src):
    # Helper function useful for testing.  It will read an entry
    # identified by 'seq' and 'src' from the database opened on the
    # dpapi cursor object 'cur', convert that entry to a JEL text
    # string, parse the text to get a new entry object, and convert
    # that entry object top JEL text.  The text generated from the
    # the original object, and from the parser-generated object,
    # are returned and can be compared.  The should be identical.

    #pdb.set_trace()
    sql = "SELECT id FROM entr WHERE seq=%s AND src=%s"
    obj = jdb.entrList(cur, sql, [seq, src])
    if not obj: return None, None
    for s in obj[0]._sens:
        jdb.augment_xrefs(cur, getattr(s, '_xref', []))
    jeltxt = _get_jel_text(obj[0])
    jellex.lexreset(lexer, jeltxt)
    result = parser.parse(jeltxt, lexer=lexer, tracking=True)
    resolv_xrefs(cur, result)
    jeltxt2 = _get_jel_text(result)
    return jeltxt, jeltxt2

Esempio n. 10

0

Mostra file

File: test_fmtjel.py Progetto: gabriel4649/JMdictDB

 def check(self, seq):
     global Cur, KW
     # Read expected text, remove any unicode BOM or trailing whitespace
     # that may have been added when editing.
     expected = open("data/fmtjel/" + str(seq) + ".txt",
                     encoding='utf-8').read().rstrip()
     if expected[0] == '\ufeff': expected = expected[1:]
     # Read the entry from the database.  Be sure to get from the right
     # corpus and get only the currently active entry.  Assert that we
     # received excatly one entry.
     sql = "SELECT id FROM entr WHERE src=1 AND seq=%s AND stat=2 AND NOT unap"
     entrs, data = jdb.entrList(Cur, sql, (seq, ), ret_tuple=True)
     self.assertEqual(1, len(entrs))
     # Add the annotations needed for dislaying xrefs in condensed form.
     jdb.augment_xrefs(Cur, data['xref'])
     jdb.augment_xrefs(Cur, data['xrer'], rev=True)
     fmtjel.markup_xrefs(Cur, data['xref'])
     # Test fmtjel by having it convert the entry to JEL.
     resulttxt = fmtjel.entr(entrs[0]).splitlines(True)
     # Confirm that the received text matched the expected text.
     if resulttxt: resulttxt = ''.join(resulttxt[1:])
     self.assert_(10 < len(resulttxt))
     msg = "\nExpected:\n%s\nGot:\n%s" % (expected, resulttxt)
     self.assertEqual(expected, resulttxt, msg)

Esempio n. 11

0

Mostra file

def submission(dbh, entr, disp, errs, is_editor=False, userid=None):
    # Add a changed entry, 'entr', to the jmdictdb database accessed
    # by the open DBAPI cursor, 'dbh'.
    #
    # dbh -- An open DBAPI cursor
    # entr -- A populated Entr object that defines the entry to
    #   be added.  See below for description of how some of its
    #   attributes affect the submission.
    # disp -- Disposition, one of three string values:
    #   '' -- Submit as normal user.
    #   'a' -- Approve this submission.
    #   'r' -- Reject this submission.
    # errs -- A list to which an error messages will be appended.
    #   Note that if the error message contains html it should be
    #   wrapped in jmcgi.Markup() to prevent it from being escaped
    #   in the template.  Conversely, error messages that contain
    #   text from user input should NOT be so wrapped since they
    #   must be escaped in the template.
    # is_editor -- True is this submission is being performed by
    #   a logged in editor.  Approved or Rejected dispositions will
    #   fail if this is false.  Its value may be conveniently
    #   obtained from jmcgi.is_editor().  False if a normal user.
    # userid -- The userid if submitter is logged in editor or
    #   None if not.
    #
    # Note that we never modify existing database entries other
    # than to sometimes completetly erase them.  Submissions
    # of all three types (submit, approve, reject) *always*
    # result in the creation of a new entry object in the database.
    # The new entry will be created by writing 'entr' to the
    # database.  The following attributes in 'entr' are relevant:
    #
    #   entr.dfrm -- If None, this is a new submission.  Otherwise,
    #       it must be the id number of the entry this submission
    #       is an edit of.
    #   entr.stat -- Must be consistent with changes requested. In
    #       particular, if it is 4 (Delete), changes made in 'entr'
    #       will be ignored, and a copy of the parent entry will be
    #       submitted with stat D.
    #   entr.src -- Required to be set, new entry will copy.
    #       # FIXME: prohibit non-editors from making src
    #       #  different than parent?
    #   entr.seq -- If set, will be copied.  If not set, submission
    #       will get a new seq number but this untested and very
    #       likely to break something.
    #       # FIXME: prohibit non-editors from making seq number
    #       #  different than parent, or non-null if no parent?
    #   entr.hist -- The last hist item on the entry will supply
    #       the comment, email and name fields to newly constructed
    #       comment that will replace it in the database.  The time-
    #       stamp and diff are regenerated and the userid field is
    #       set from our userid parameter.
    #       # FIXME: should pass history record explicity so that
    #       #  we can be sure if the caller is or is not supplying
    #       #  one.  That will make it easier to use this function
    #       #  from other programs.
    # The following entry attributes need not be set:
    #   entr.id -- Ignored (reset to None).
    #   entr.unap -- Ignored (reset based on 'disp').
    # Additionally, if 'is_editor' is false, the rdng._freq and
    # kanj._freq items will be copied from the parent entr rather
    # than using the ones supplied  on 'entr'.  See jdb.copy_freqs()
    # for details about how the copy works when the rdng's or kanj's
    # differ between the parent and 'entr'.

    KW = jdb.KW
    L('cgi.edsubmit.submission').info(
        ("disp=%s, is_editor=%s, userid=%s, entry id=%s,\n" + " " * 36 +
         "parent=%s, stat=%s, unap=%s, seq=%s, src=%s") %
        (disp, is_editor, userid, entr.id, entr.dfrm, entr.stat, entr.unap,
         entr.seq, entr.src))
    L('cgi.edsubmit.submission').info("entry text: %s %s" %
                                      ((';'.join(k.txt for k in entr._kanj)),
                                       (';'.join(r.txt for r in entr._rdng))))
    L('cgi.edsubmit.submission').debug("seqset: %s" %
                                       logseq(dbh, entr.seq, entr.src))
    oldid = entr.id
    entr.id = None  # Submissions, approvals and rejections will
    entr.unap = not disp  #   always produce a new db entry object so
    merge_rev = False  #   nuke any id number.
    if not entr.dfrm:  # This is a submission of a new entry.
        entr.stat = KW.STAT['A'].id
        entr.seq = None  # Force addentr() to assign seq number.
        pentr = None  # No parent entr.
        edtree = None
    else:  # Modification of existing entry.
        edroot = get_edroot(dbh, entr.dfrm)
        edtree = get_subtree(dbh, edroot)
        # Get the parent entry and augment the xrefs so when hist diffs
        # are generated, they will show xref details.
        L('cgi.edsubmit.submission').debug("reading parent entry %d" %
                                           entr.dfrm)
        pentr, raw = jdb.entrList(dbh, None, [entr.dfrm], ret_tuple=True)
        if len(pentr) != 1:
            L('cgi.edsubmit.submission').debug("missing parent %d" % entr.dfrm)
            # The editset may have changed between the time our user
            # displayed the Confirmation screen and they clicked the
            # Submit button.  Changes involving unapproved edits result
            # in the addition of entries and don't alter the preexisting
            # tree shape.  Approvals of edits, deletes or rejects may
            # affect our subtree and if so will always manifest themselves
            # as the disappearance of our parent entry.
            errs.append(
                "The entry you are editing no loger exists because it "
                "was approved, deleted or rejected.  "
                "Please search for entry '%s' seq# %s and reenter your changes "
                "if they are still applicable." %
                (KW.SRC[ent.src].kw, entr.seq))
            return
        pentr = pentr[0]
        jdb.augment_xrefs(dbh, raw['xref'])

        if entr.stat == KW.STAT['D'].id:
            # If this is a deletion, set $merge_rev.  When passed
            # to function merge_hist() it will tell it to return the
            # edited entry's parent, rather than the edited entry
            # itself.  The reason is that if we are doing a delete,
            # we do not want to make any changes to the entry, even
            # if the submitter has done so.
            merge_rev = True

    # Merge_hist() will combine the history entry in the submitted
    # entry with the all the previous history records in the
    # parent entry, so the the new entry will have a continuous
    # history.  In the process it checks that the parent entry
    # exists -- it might not if someone else has approved a
    # different edit in the meantime.
    # merge_hist also returns an entry.  If 'merge_rev' is false,
    # the entry returned is 'entr'.  If 'merge_rev' is true,
    # the entry returned is the entr pointed to by 'entr.dfrm'
    # (i.e. the original entry that the submitter edited.)
    # This is done when a delete is requested and we want to
    # ignore any edits the submitter may have made (which 'entr'
    # will contain.)

    # Before calling merge_hist() check for a condition that would
    # cause merge_hist() to fail.
    if entr.stat == KW.STAT['D'].id and not getattr(entr, 'dfrm', None):
        L('cgi.edsubmit.submission').debug("delete of new entry error")
        errs.append("Delete requested but this is a new entry.")

    if disp == 'a' and has_xrslv(entr) and entr.stat == KW.STAT['A'].id:
        L('cgi.edsubmit.submission').debug("unresolved xrefs error")
        errs.append("Can't approve because entry has unresolved xrefs")

    if not errs:
        # If this is a submission by a non-editor, restore the
        # original entry's freq items which non-editors are not
        # allowed to change.
        if not is_editor:
            if pentr:
                L('cgi.edsubmit.submission').debug("copying freqs from parent")
                jdb.copy_freqs(pentr, entr)
            # Note that non-editors can provide freq items on new
            # entries.  We expect an editor to vet this when approving.

        # Entr contains the hist record generate by the edconf.py
        # but it is not trustworthy since it could be modified or
        # created from scratch before we get it.  So we extract
        # the unvalidated info from it (name, email, notes, refs)
        # and recreate it.
        h = entr._hist[-1]
        # When we get here, if merge_rev is true, pentr will also be
        # true.  If we are wrong, add_hist() will throw an exception
        # but will never return a None, so no need to check return val.
        L('cgi.edsubmit.submission').debug("adding hist for '%s', merge=%s" %
                                           (h.name, merge_rev))
        entr = jdb.add_hist(entr, pentr, userid, h.name, h.email, h.notes,
                            h.refs, merge_rev)
    if not errs:
        # Occasionally, often from copy-pasting, a unicode BOM
        # character finds its way into one of an entry's text
        #  strings.  We quietly remove any here.
        n = jdb.bom_fixall(entr)
        if n > 0:
            L('cgi.edsubmit.submission').debug("removed %s BOM character(s)" %
                                               n)

    if not errs:
        if not disp:
            added = submit(dbh, entr, edtree, errs)
        elif disp == "a":
            added = approve(dbh, entr, edtree, errs)
        elif disp == "r":
            added = reject(dbh, entr, edtree, errs, None)
        else:
            L('cgi.edsubmit.submission').debug("bad url parameter (disp=%s)" %
                                               disp)
            errs.append("Bad url parameter (disp=%s)" % disp)
    L('cgi.edsubmit.submission').debug("seqset: %s" %
                                       logseq(dbh, entr.seq, entr.src))
    if not errs: return added
    # Note that changes have not been committed yet, caller is
    # expected to do that.
    return None

Esempio n. 12

0

Mostra file

File: jmcgi.py Progetto: cobysy/jmdictdb

def get_entrs(dbh, elist, qlist, errs, active=None, corpus=None):
    # Retrieve a set of Entr objects from the database, specified
    # by their entry id and/or seq numbers.
    #
    # dbh -- Open dbapi cursor to the current database.
    # elist -- List of id numbers of entries to get.  Each number
    #       may by either a integer or a string.
    # qlist -- List of seq numbers of entries to get.  Each seq
    #       number may be an integer or a string.  If the latter
    #       it may be followed by a period, and a corpus identifier
    #       which is either the corpus id number or the corpus name.
    # errs -- Must be a list (or other append()able object) to
    #       which any error messages will be appended.
    # active -- If 1, only active/approved or new/(unapproved)
    #       entries will be retrieved.
    #       If 2, at most one entry will be returned for each seq number
    #       in the results and that entry will be the most recently edited
    #       (chronologically based on history records) entry if one exists
    #       of the approved active entry.
    #       If active is any other value or not present, all entries
    #       meeting the entry-id, seq, or seq-corpus criteria will be
    #       retrieved.
    # corpus -- If not none, this is a corpus id number or name
    #       and will apply to any seq numbers without an explicit
    #       corpus given with the number.
    #
    # If the same entry is specified more than once in 'elist' and/or
    # 'qlist' ir will only occur once in the returned object list.
    # Objects in the returned list are in no particular order.

    eargs = []
    qargs = []
    xargs = []
    whr = []
    corpid = None
    if corpus is not None:
        corpid = corp2id(corpus)
        if corpid is None:
            errs.append("Bad corpus parameter: %s" % corpus)
            return []
    for x in (elist or []):
        try:
            eargs.append(str2eid(str(x)))
        except ValueError:
            errs.append("Bad url parameter received: " + esc(x))
    if eargs: whr.append("id IN (" + ','.join(['%s'] * len(eargs)) + ")")

    for x in (qlist or []):
        try:
            args = list(str2seq(str(x)))
        except ValueError:
            errs.append("Bad parameter received: " + esc(x))
        else:
            if corpus and not args[1]: args[1] = corpid
            if args[1]:
                whr.append("(seq=%s AND src=%s)")
                qargs.extend(args)
            else:
                whr.append("seq=%s")
                qargs.append(args[0])
    if not whr: errs.append("No valid entry or seq numbers given.")
    if errs: return None
    whr2 = ''
    distinct = ''
    hjoin = ''
    order = ''
    try:
        active = int(active)
    except (ValueError, TypeError):
        pass
    if active == 1:
        # Following will restrict returned rows to active/approved
        # (stat=A and not unap) or new (dfrm is NULL), that is, the
        # result set will not include any stat=D or stat=R results.
        whr2 = " AND stat=%s AND (NOT unap OR dfrm IS NULL)"
        xargs.append(jdb.KW.STAT['A'].id)
    elif active == 2:
        # Restrict returned rows to active (no stat=D or stat=R results)
        # and most recent edit as determined by the history records (if any).
        # In no event will more than one entry per seq number be returned.
        # Note that this will necessarily return the edit from only one
        # branch when multiple branches exist which may result in surprise
        # for a user when the returned entry shows no signs of a recent
        # edit known to have been made.
        # Example of generated sql:
        # SELECT DISTINCT ON (e.seq) e.id FROM entr e LEFT JOIN hist h ON h.entr=e.id
        #  WHERE e.seq=2626330 and e.stat=2 ORDER BY e.seq,h.dt DESC NULLS LAST;
        whr2 = " AND e.stat=%s"
        xargs.append(jdb.KW.STAT['A'].id)
        distinct = " DISTINCT ON (e.seq)"
        hjoin = " LEFT JOIN hist h ON h.entr=e.id"
        # "NULLS LAST" is needed below because some entries (e.g., entries
        # imported when JMdictDB is first initialized and never edited)
        # may not have history records which will result in 'dt' values of
        # NULL; we want those entries last.
        order = " ORDER BY e.seq,h.dt DESC NULLS LAST"
    sql = "SELECT" + distinct + " e.id FROM entr e " \
             + hjoin + " WHERE (" + " OR ".join (whr) + ")" + whr2 + order
    entries, raw = jdb.entrList(dbh,
                                sql,
                                eargs + qargs + xargs,
                                ret_tuple=True)
    if entries:
        jdb.augment_xrefs(dbh, raw['xref'])
        jdb.augment_xrefs(dbh, raw['xrer'], rev=1)
        jdb.add_xsens_lists(raw['xref'])
        jdb.mark_seq_xrefs(dbh, raw['xref'])
    return entries

Esempio n. 13

0

Mostra file

File: edconf.py Progetto: gabriel4649/JMdictDB

def main(args, opts):
    jdb.reset_encoding(sys.stdout, 'utf-8')
    errs = []
    chklist = {}
    try:
        form, svc, dbg, cur, sid, sess, parms, cfg = jmcgi.parseform()
    except Exception as e:
        jmcgi.err_page([str(e)])

    fv = form.getfirst
    fl = form.getlist
    KW = jdb.KW

    # 'eid' will be an integer if we are editing an existing
    # entry, or undefined if this is a new entry.
    pentr = None
    eid = url_int('id', form, errs)
    if eid:
        # Get the parent entry of the edited entry.  This is what the
        # edited entry will be diff'd against for the history record.
        # It is also the entry that will be pointed to by the edited
        # entry's 'dfrm' field.
        pentr = jdb.entrList(cur, None, [eid])
        #FIXME: Need a better message with more explanation.
        if not pentr:
            errs.append("The entry you are editing has been deleted.")
        else:
            pentr = pentr[0]

    # Desired disposition: 'a':approve, 'r':reject, undef:submit.
    disp = url_str('disp', form)
    if disp != 'a' and disp != 'r' and disp != '' and disp is not None:
        errs.append("Invalid 'disp' parameter: '%s'" % disp)

    # New status is A for edit of existing or new entry, D for
    # deletion of existing entry.
    delete = fv('delete')
    makecopy = fv('makecopy')
    if delete and makecopy:
        errs.append(
            "The 'delete' and 'treat as new'"
            " checkboxes are mutually exclusive; please select only one.")
    if makecopy: eid = None
    # FIXME: we need to disallow new entries with corp.seq
    # that matches an existing A, A*, R*, D*, D? entry.
    # Do same check in submit.py.

    seq = url_int('seq', form, errs)
    src = url_int('src', form, errs)
    notes = url_str('notes', form)
    srcnote = url_str('srcnote', form)

    # These are the JEL (JMdict Edit Language) texts which
    # we will concatenate into a string that is fed to the
    # JEL parser which will create an Entr object.
    kanj = (stripws(url_str('kanj', form))).strip()
    rdng = (stripws(url_str('rdng', form))).strip()
    sens = (url_str('sens', form)).strip()
    intxt = "\f".join((kanj, rdng, sens))
    grpstxt = url_str('grp', form)

    # Get the meta-edit info which will go into the history
    # record for this change.
    comment = url_str('comment', form)
    refs = url_str('reference', form)
    name = url_str('name', form)
    email = url_str('email', form)

    if errs: jmcgi.err_page(errs)

    # Parse the entry data.  Problems will be reported
    # by messages in 'perrs'.  We do the parse even if
    # the request is to delete the entry (is this right
    # thing to do???) since on the edconf page we want
    # to display what the entry was.  The edsubmit page
    # will do the actual deletion.

    entr, errs = parse(intxt)
    # 'errs' is a list which if not empty has a single item
    # which is a 2-seq of str's: (error-type, error-message).
    if errs or not entr:
        if not entr and not errs:
            errs = ([], "Unable to create an entry from your input.")
        jmcgi.err_page([errs[0][1]], prolog=errs[0][0], cssclass="errormsg")

    entr.dfrm = eid
    entr.unap = not disp

    # To display the xrefs and reverse xrefs in html, they
    # need to be augmented with additional info about their
    # targets.  collect_refs() simply returns a list Xref
    # objects that are on the entr argument's .xref list
    # (forward xrefs) if rev not true, or the Xref objects
    # on the entr argument's ._xrer list (reverse xrefs) if
    # rev is true).  This does not remove them from the entry
    # and is done simply for convenience so we can have
    # augment_xrefs() process them all in one shot.
    # augment_xrefs add an attribute, .TARG, to each Xref
    # object whose value is an Entr object for the entry the
    # xref points to if rev is not true, or the entry the xref
    # is from, if rev is true.  These Entr objects can be used
    # to display info about the xref target or source such as
    # seq#, reading or kanji.  See jdb.augment_xrefs() for details.
    # Note that <xrefs> and <xrers> below contain references
    # to the xrefs on the entries; thus the augmentation done
    # by jdb.augment_xrefs() alters the xref objects on those
    # entries.
    if pentr:
        x = jdb.collect_xrefs([pentr])
        if x: jdb.augment_xrefs(cur, x)
        # Although we don't allow editing of an entry's reverse
        # xref, we still augment them (on the parent entry)
        # because we will display them.
        x = jdb.collect_xrefs([pentr], rev=True)
        if x: jdb.augment_xrefs(cur, x, rev=True)
    x = jdb.collect_xrefs([entr])
    if x: jdb.augment_xrefs(cur, x)

    if delete:
        # Ignore any content changes made by the submitter by
        # restoring original values to the new entry.
        entr.seq = pentr.seq
        entr.src = pentr.src
        entr.stat = KW.STAT['D'].id
        entr.notes = pentr.notes
        entr.srcnote = pentr.srcnote
        entr._kanj = getattr(pentr, '_kanj', [])
        entr._rdng = getattr(pentr, '_rdng', [])
        entr._sens = getattr(pentr, '_sens', [])
        entr._snd = getattr(pentr, '_snd', [])
        entr._grp = getattr(pentr, '_grp', [])
        entr._cinf = getattr(pentr, '_cinf', [])

    else:
        # Migrate the entr details to the new entr object
        # which to this point has only the kanj/rdng/sens
        # info provided by jbparser.
        entr.seq = seq
        entr.src = src
        entr.stat = KW.STAT['A'].id
        entr.notes = notes
        entr.srcnote = srcnote
        entr._grp = jelparse.parse_grp(grpstxt)

        # This form and the JEL parser provide no way to change
        # some entry attributes such _cinf, _snd, reverse xrefs
        # and for non-editors, _freq.  We need to copy these items
        # from the original entry to the new, edited entry to avoid
        # loosing them.  The copy can be shallow since we won't be
        # changing the copied content.
        if pentr:
            if not jmcgi.is_editor(sess):
                jdb.copy_freqs(pentr, entr)
            if hasattr(pentr, '_cinf'): entr._cinf = pentr._cinf
            copy_snd(pentr, entr)

            # Copy the reverse xrefs that are on pentr to entr,
            # removing any that are no longer valid because they
            # refer to senses , readings or kanji no longer present
            # on the edited entry.  Note that these have already
            # been augmented above.
            nuked_xrers = realign_xrers(entr, pentr)
            if nuked_xrers:
                chklist['xrers'] = format_for_warnings(nuked_xrers, pentr)

        # Add sound details so confirm page will look the same as the
        # original entry page.  Otherwise, the confirm page will display
        # only the sound clip id(s).
        #FIXME? Should the following snd augmentation stuff be outdented
        # one level so that it is done in both the delete and non-delete
        # paths?
        snds = []
        for s in getattr(entr, '_snd', []):
            snds.append(s)
        for r in getattr(entr, '_rdng', []):
            for s in getattr(r, '_snd', []):
                snds.append(s)
        if snds: jdb.augment_snds(cur, snds)

        # If any xrefs were given, resolve them to actual entries
        # here since that is the form used to store them in the
        # database.  If any are unresolvable, an approriate error
        # is saved and will reported later.

        rslv_errs = jelparse.resolv_xrefs(cur, entr)
        if rslv_errs: chklist['xrslv'] = rslv_errs

    if errs: jmcgi.err_page(errs)

    # Append a new hist record details this edit.
    if not hasattr(entr, '_hist'): entr._hist = []
    entr = jdb.add_hist(entr, pentr, sess.userid if sess else None, name,
                        email, comment, refs, entr.stat == KW.STAT['D'].id)
    if not delete:
        check_for_errors(entr, errs)
        if errs: jmcgi.err_page(errs)
        pseq = pentr.seq if pentr else None
        check_for_warnings(cur, entr, pseq, chklist)

    # The following all expect a list of entries.
    jmcgi.add_filtered_xrefs([entr], rem_unap=False)
    serialized = serialize.serialize([entr])
    jmcgi.htmlprep([entr])

    entrs = [[entr, None]]  # Package 'entr' as expected by entr.jinja.
    jmcgi.jinja_page("edconf.jinja",
                     entries=entrs,
                     serialized=serialized,
                     chklist=chklist,
                     disp=disp,
                     parms=parms,
                     svc=svc,
                     dbg=dbg,
                     sid=sid,
                     session=sess,
                     cfg=cfg,
                     this_page='edconf.py')

Esempio n. 14

0

Mostra file

File: entrs2xml.py Progetto: cobysy/jmdictdb

def main(args, opts):
    global Debug
    Debug = opts.debug
    # Open the database.  jdb.dbopts() extracts the db-related
    # options from the command line options in 'opts'.
    cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts))

    # If no "--root" option was supplied, choose a default based
    # on the value of the "--compat" option.
    if not opts.root:
        if opts.compat in ('jmnedict', 'jmneold'): opts.root = 'JMnedict'
        else: opts.root = 'JMdict'

    outf = None
    if not opts.nodtd:
        # Choose a dtd to use based on the "--compat" option.
        # The dtd file is expected to be located somewhere in the
        # pythonpath (sys.path) directories.
        if opts.compat == 'jmdict': dtd = "dtd-jmdict.xml"
        elif opts.compat == 'jmdicthist': dtd = "dtd-jmdict.xml"
        elif opts.compat == 'jmnedict': dtd = "dtd-jmnedict.xml"
        elif opts.compat == 'jmneold': dtd = "dtd-jmneold.xml"
        else: dtd = "dtd-jmdict-ex.xml"
        dir = jdb.find_in_syspath(dtd)
        dtdfn = dir + "/" + dtd  # Fully qualified dtd file name.

        # jdb.get_dtd() reads the dtd text, and replaces the root
        # element name name and encoding with the values supplied
        # in the arguments.
        dtdtxt = jdb.get_dtd(dtdfn, opts.root, opts.encoding)
        if len(args) == 0: outf = sys.stdout
        else: outf = open(args[0], "w")
        jdb.reset_encoding(outf, opts.encoding)
        outf.write(dtdtxt)

    if opts.seqfile:
        if opts.seqfile == '-': f = sys.stdin
        else: f = open(opts.seqfile)
        #FIXME: we should read these incrementally.
        entrlist = [int(x)
                    for x in f.read().split()]  # seq# separated by sp or nl.
        if f != sys.stdin: f.close()

    # Turn the "--corpus" option value into a string that can be
    # and'ed into a SQL WHERE clause to restrict the results to
    # the specified corpora.
    corp_terms = parse_corpus_opt(opts.corpus, 'e.src')

    # If the output file was not opened in the dtd section
    # above, open it now.  We postpose opening it until the
    # last possible moment to avoid creating it and then
    # bombing because there was a typo in the input or dtd
    # filename, etc.
    # FIXME: Should do a "write" function that opens the
    #  file just before writing.
    if not outf:
        if len(args) == 0: outf = sys.stdout
        else: outf = open(args[0], "w")

    whr_act = " AND NOT unap AND stat=" + str(
        jdb.KW.STAT['A'].id) if opts.compat else ""
    if opts.begin:
        # If a "--begin" sequence number was given, we need to read
        # the entr record so we can get the src id number.  Complain
        # and exit if not found.  Complain if more than one entry
        # with the requested seq number exists.  More than one may be
        # found since the same sequence number may exist in different
        # corpora, or in the same corpus if an entry was edited.
        #
        #FIXME: no way to select from multiple entries with same seq
        # number.  Might want just the stat="A" entries for example.
        sql = "SELECT id,seq,src FROM entr e WHERE seq=%s%s%s ORDER BY src" \
                % (int(opts.begin), corp_terms, whr_act)
        if Debug: print(sql, file=sys.stderr)
        start = time.time()
        rs = jdb.dbread(cur, sql)
        if Debug:
            print("Time: %s (init read)" % (time.time() - start),
                  file=sys.stderr)
        if not rs:
            print ("No entry with seq '%s' found" \
                                 % opts.begin, file=sys.stderr)
            sys.exit(1)
        if len(rs) > 1:
            print ("Multiple entries having seq '%s' found, results " \
                   "may not be as expected.  Consider using -s to " \
                   "restrict to a single corpus." % (opts.begin), file=sys.stderr)
        lastsrc, lastseq, lastid = rs[0].src, rs[0].seq, rs[0].id
    if not opts.begin and not opts.seqfile:
        # If no "--begin" option, remove the " AND" from the front of
        # the 'corp_terms' string.  Read the first entry (by seq number)
        # in the requested corpora.
        cc = corp_terms[4:] if corp_terms else 'True'
        # If compat (jmdict or jmnedict), restrict the xml to Active
        # entries only.
        sql = "SELECT id,seq,src FROM entr e WHERE %s%s ORDER BY src,seq LIMIT 1" % (
            cc, whr_act)
        start = time.time()
        if Debug: print(sql, file=sys.stderr)
        rs = jdb.dbread(cur, sql)
        if Debug:
            print("Time: %s (init read)" % (time.time() - start),
                  file=sys.stderr)
        lastsrc, lastseq, lastid = rs[0].src, rs[0].seq, rs[0].id

    # Add an enclosing root element only if we are also including
    # a DTD (ie, producing a full XML file).  Otherwise, the file
    # generated will just be a list of <entr> elements.
    if not opts.nodtd:
        if opts.compat:  # Add a date comment...
            today = time.strftime("%Y-%m-%d", time.localtime())
            outf.write("<!-- %s created: %s -->\n" % (opts.root, today))
        outf.write('<%s>\n' % opts.root)

    entrlist_loc = 0
    count = opts.count
    done = 0
    blksize = opts.blocksize
    corpora = set()

    while count is None or count > 0:

        if opts.seqfile:
            seqnums = tuple(entrlist[entrlist_loc:entrlist_loc + blksize])
            if not seqnums: break
            entrlist_loc += blksize
            #FIXME: need detection of non-existent seq#s.
            sql = "SELECT id FROM entr e WHERE seq IN %s" + corp_terms + whr_act
            sql_args = [seqnums]
            if Debug: print(sql, sql_args, file=sys.stderr)
            start = time.time()
            tmptbl = jdb.entrFind(cur, sql, sql_args)
        else:
            # In this loop we read blocks of 'blksize' entries.  Each
            # block read is ordered by entr src (i.e. corpus), seq, and
            # id.  The block to read is specified in WHERE clause which
            # is effectively:
            #   WHERE ((e.src=lastsrc AND e.seq=lastseq AND e.id>=lastid+1)
            #           OR (e.src=lastsrc AND e.seq>=lastseq)
            #           OR e.src>lastsrc)
            # and (lastsrc, lastseq, lastid) are from the last entry in
            # the last block read.

            whr = "WHERE ((e.src=%%s AND e.seq=%%s AND e.id>=%%s) " \
                          "OR (e.src=%%s AND e.seq>%%s) " \
                          "OR e.src>%%s) %s%s" % (corp_terms, whr_act)
            sql = "SELECT e.id FROM entr e" \
                  " %s ORDER BY src,seq,id LIMIT %d" \
                   % (whr, blksize if count is None else min (blksize, count))

            # The following args will be substituted for the "%%s" in
            # the sql above, in jbd.findEntr().
            sql_args = [lastsrc, lastseq, lastid, lastsrc, lastseq, lastsrc]

            # Create a temporary table of id numbers and give that to
            # jdb.entrList().  This is an order of magnitude faster than
            # giving the above sql directly to entrList().
            if Debug: print(sql, sql_args, file=sys.stderr)
            start = time.time()
            tmptbl = jdb.entrFind(cur, sql, sql_args)
        mid = time.time()
        entrs, raw = jdb.entrList(cur,
                                  tmptbl,
                                  None,
                                  ord="src,seq,id",
                                  ret_tuple=True)
        end = time.time()
        if Debug: print("read %d entries" % len(entrs), file=sys.stderr)
        if Debug:
            print("Time: %s (entrFind), %s (entrList)" %
                  (mid - start, end - mid),
                  file=sys.stderr)
        if not entrs: break
        write_entrs(cur, entrs, raw, corpora, opts, outf)

        # Update the 'last*' variables for the next time through
        # the loop.  Also, decrement 'count', if we are counting.
        lastsrc = entrs[-1].src
        lastseq = entrs[-1].seq
        lastid = entrs[-1].id + 1
        if count is not None: count -= blksize
        done += len(entrs)
        if not Debug: sys.stderr.write('.')
        else: print("%d entries written" % done, file=sys.stderr)
    if not opts.nodtd: outf.writelines('</%s>\n' % opts.root)
    if not Debug: sys.stderr.write('\n')
    print("Wrote %d entries" % done, file=sys.stderr)