Exemple #1
0
def main(args, opts):
    # The following call creates a database "cursor" that will
    # be used for subsequent database operations.  It also, as
    # a side-effect, create a global variable in module 'jdb'
    # named 'KW' which contains data read from all the keyword
    # database tables (tables with names matchingthe pattern
    # "kw*".  We read this data once at program startup to avoid
    # multiple hi-cost trips to the database later.
    try:
        cur = jdb.dbOpen(opts.database, **jdb.dbopts(opts))
    except jdb.dbapi.OperationalError as e:
        print("Error, unable to connect to database, do you need -u or -p?\n",
              str(e),
              file=sys.stderr)
        sys.exit(1)

    enc = opts.encoding or sys.stdout.encoding or 'utf-8'
    jdb.reset_encoding(sys.stdout, encoding=enc)

    # Get the command line options and convert them into a sql
    # statement that will find the desired entries.
    sql, sqlargs = opts2sql(args, opts)
    if opts.debug:
        print(("%s  %s" % (sql, repr(sqlargs))))

    # Retrieve the entries from the database.  'entrs' will be
    # set to a list on entry objects.  'raw' is set to dictionary,
    # keyed by table name, and with values consisting of all the
    # rows retrieved from that table.
    entrs, raw = jdb.entrList(cur, sql, sqlargs, ret_tuple=True)

    # Any xrefs in the retrieved entry objects contain contain only
    # the entry id numbers of the referenced entries.  We want to be
    # able to show the refernced entriy's kanji, glosses, etc so we
    # call "augment_xrefs" to get this extra information.  Same for
    # any reverse refrerences.
    jdb.augment_xrefs(cur, raw['xref'])
    jdb.augment_xrefs(cur, raw['xrer'], rev=1)
    jdb.add_xsens_lists(raw['xref'])
    jdb.mark_seq_xrefs(cur, raw['xref'])

    # Now all we have to do is print the entries.
    first = True
    for e in entrs:
        # Format the entry for printing, according to the
        # kind of out put the user requested.
        if opts.jel: txt = fmtjel.entr(e)
        else: txt = fmt.entr(e)

        # Print the formatted entry using the requested encoding
        # and inserting a blank line between entries.
        if not first: print()
        print(txt)
        first = False

    if len(entrs) == 0: print("No entries found")
Exemple #2
0
def roundtrip(cur, intxt):
    jellex.lexreset(Lexer, intxt)
    entr = Parser.parse(intxt, lexer=Lexer)
    entr.src = 1
    jelparse.resolv_xrefs(cur, entr)
    for s in entr._sens:
        jdb.augment_xrefs(cur, getattr(s, '_xref', []))
    for s in entr._sens:
        jdb.add_xsens_lists(getattr(s, '_xref', []))
    for s in entr._sens:
        jdb.mark_seq_xrefs(cur, getattr(s, '_xref', []))
    outtxt = fmtjel.entr(entr, nohdr=True)
    return outtxt
Exemple #3
0
def roundtrip(cur, intxt):
    # Since hg-180523-6b1a12 we use '\f' to separate the kanji, reading
    # and senses sections in JEL text used as input to jelparse()
    # rather than '\n' which was previously used.  To avoid changing
    # all the test data that still uses '\n', we call secsepfix() to
    # replace the first two '\n's in the test data with '\f's to make
    # suitable for parsing.
    intxt = secsepfix(intxt)
    jellex.lexreset(Lexer, intxt)
    entr = Parser.parse(intxt, lexer=Lexer)
    entr.src = 1
    jelparse.resolv_xrefs(cur, entr)
    for s in entr._sens:
        jdb.augment_xrefs(cur, getattr(s, '_xref', []))
    for s in entr._sens:
        jdb.add_xsens_lists(getattr(s, '_xref', []))
    for s in entr._sens:
        jdb.mark_seq_xrefs(cur, getattr(s, '_xref', []))
    outtxt = fmtjel.entr(entr, nohdr=True)
    return outtxt
Exemple #4
0
def xrefs(xrefs, src):
    # Generate xml for xrefs.  If there is an xref to every
    # sense of a target entry, then we generate a single
    # xref element without a sense number.  Otherwise we
    # generate an xref element with sense number for each
    # target sense.
    #
    # xrefs -- A list of xref objects to be formatted.  The
    #   xrefs must have an augmented target attribute (as
    #   produced by calling augment_xrefs()) or an error
    #   will be raised (in function xref).
    #
    # src -- Corpus id number of the entry that contains
    #   the target 'xref' of the xrefs.
    #   If 'src' is true, enhanced XML will be generated.
    #   If not, legacy JMdict XML will be generated.

    fmt = []
    # Mark each xref that differs only by .xsens value with
    # a ._xsens attribute that will be a list of all .xsens
    # values on the first such xref, and an emply list on
    # subsequent such xrefs.
    jdb.add_xsens_lists(xrefs)

    for x in xrefs:
        # If ._xsens is empty, this xref can we ignored since
        # we already formatted a preceeding matching xref that
        # contained a list of all .xsens values.
        if not x._xsens: continue

        # Check that augment_xrefs() was called on this
        # xref.  The target object is needed because we
        # it has the actual kanji and reading texts that
        # will be used in the xml xref, as well and the
        # the number of senses, which we also need.
        try:
            targ = x.TARG
        except AttributeError:
            raise AttributeError(
                "xref missing TARG attribute.  Did you forget to call augment_xrefs()?"
            )

        # If generating JMdict-compatible XML, don't generate
        # xrefs to entries that are unapproved or whose status
        # is not active (i.e. deleted or rejected.)
        if not src and (targ.unap or targ.stat != jdb.KW.STAT['A'].id):
            continue

        # Format the xref into xml text.
        fmtdxref = xref(x, src)

        # We can assume that, since the database RI constraints
        # won't allow two xrefs in the same source to point to
        # the same target and sense, if the number of xsens values
        # in the .xsens list equals the number of target senses,
        # there is one xref pointing to each sense.
        if len(targ._sens) != len(x._xsens):
            # There is not an xref for each target sense, so we
            # want to generate xrefs with explicit target senses.
            for s in x._xsens:
                # The string returned by xref() has a "%s"
                # placeholder for the sense number.  Generate
                # an xref element with sense for each xref in
                # the group.  \u30FB is mid-height dot.
                fmt.append(fmtdxref % '\u30FB%d' % s)
        else:
            # There is an xref for each target sense so we want
            # to supress the target sense numbers.
            fmt.append(fmtdxref % '')
    return fmt
Exemple #5
0
def markup_xrefs(cur, xrefs):
    jdb.add_xsens_lists(xrefs)
    jdb.mark_seq_xrefs(cur, xrefs)
Exemple #6
0
def get_entrs(dbh, elist, qlist, errs, active=None, corpus=None):
    # Retrieve a set of Entr objects from the database, specified
    # by their entry id and/or seq numbers.
    #
    # dbh -- Open dbapi cursor to the current database.
    # elist -- List of id numbers of entries to get.  Each number
    #       may by either a integer or a string.
    # qlist -- List of seq numbers of entries to get.  Each seq
    #       number may be an integer or a string.  If the latter
    #       it may be followed by a period, and a corpus identifier
    #       which is either the corpus id number or the corpus name.
    # errs -- Must be a list (or other append()able object) to
    #       which any error messages will be appended.
    # active -- If 1, only active/approved or new/(unapproved)
    #       entries will be retrieved.
    #       If 2, at most one entry will be returned for each seq number
    #       in the results and that entry will be the most recently edited
    #       (chronologically based on history records) entry if one exists
    #       of the approved active entry.
    #       If active is any other value or not present, all entries
    #       meeting the entry-id, seq, or seq-corpus criteria will be
    #       retrieved.
    # corpus -- If not none, this is a corpus id number or name
    #       and will apply to any seq numbers without an explicit
    #       corpus given with the number.
    #
    # If the same entry is specified more than once in 'elist' and/or
    # 'qlist' ir will only occur once in the returned object list.
    # Objects in the returned list are in no particular order.

    eargs = []
    qargs = []
    xargs = []
    whr = []
    corpid = None
    if corpus is not None:
        corpid = corp2id(corpus)
        if corpid is None:
            errs.append("Bad corpus parameter: %s" % corpus)
            return []
    for x in (elist or []):
        try:
            eargs.append(str2eid(str(x)))
        except ValueError:
            errs.append("Bad url parameter received: " + esc(x))
    if eargs: whr.append("id IN (" + ','.join(['%s'] * len(eargs)) + ")")

    for x in (qlist or []):
        try:
            args = list(str2seq(str(x)))
        except ValueError:
            errs.append("Bad parameter received: " + esc(x))
        else:
            if corpus and not args[1]: args[1] = corpid
            if args[1]:
                whr.append("(seq=%s AND src=%s)")
                qargs.extend(args)
            else:
                whr.append("seq=%s")
                qargs.append(args[0])
    if not whr: errs.append("No valid entry or seq numbers given.")
    if errs: return None
    whr2 = ''
    distinct = ''
    hjoin = ''
    order = ''
    try:
        active = int(active)
    except (ValueError, TypeError):
        pass
    if active == 1:
        # Following will restrict returned rows to active/approved
        # (stat=A and not unap) or new (dfrm is NULL), that is, the
        # result set will not include any stat=D or stat=R results.
        whr2 = " AND stat=%s AND (NOT unap OR dfrm IS NULL)"
        xargs.append(jdb.KW.STAT['A'].id)
    elif active == 2:
        # Restrict returned rows to active (no stat=D or stat=R results)
        # and most recent edit as determined by the history records (if any).
        # In no event will more than one entry per seq number be returned.
        # Note that this will necessarily return the edit from only one
        # branch when multiple branches exist which may result in surprise
        # for a user when the returned entry shows no signs of a recent
        # edit known to have been made.
        # Example of generated sql:
        # SELECT DISTINCT ON (e.seq) e.id FROM entr e LEFT JOIN hist h ON h.entr=e.id
        #  WHERE e.seq=2626330 and e.stat=2 ORDER BY e.seq,h.dt DESC NULLS LAST;
        whr2 = " AND e.stat=%s"
        xargs.append(jdb.KW.STAT['A'].id)
        distinct = " DISTINCT ON (e.seq)"
        hjoin = " LEFT JOIN hist h ON h.entr=e.id"
        # "NULLS LAST" is needed below because some entries (e.g., entries
        # imported when JMdictDB is first initialized and never edited)
        # may not have history records which will result in 'dt' values of
        # NULL; we want those entries last.
        order = " ORDER BY e.seq,h.dt DESC NULLS LAST"
    sql = "SELECT" + distinct + " e.id FROM entr e " \
             + hjoin + " WHERE (" + " OR ".join (whr) + ")" + whr2 + order
    entries, raw = jdb.entrList(dbh,
                                sql,
                                eargs + qargs + xargs,
                                ret_tuple=True)
    if entries:
        jdb.augment_xrefs(dbh, raw['xref'])
        jdb.augment_xrefs(dbh, raw['xrer'], rev=1)
        jdb.add_xsens_lists(raw['xref'])
        jdb.mark_seq_xrefs(dbh, raw['xref'])
    return entries