Esempio n. 1
0
def _check(_, in_str, begin='INITIAL'):
    jellex.lexreset(_.lexer, in_str, begin)
    tokens = []
    values = []
    while 1:
        tok = _.lexer.token()
        if not tok: break
        tokens.append(tok.type)
        values.append(tok.value)
    return tokens, values
Esempio n. 2
0
def parse(krstext):
    entr = None
    errs = []
    lexer, tokens = jellex.create_lexer()
    parser = jelparse.create_parser(lexer, tokens)
    jellex.lexreset(lexer, krstext)
    try:
        entr = parser.parse(krstext, lexer=lexer, tracking=True)
    except jelparse.ParseError as e:
        errs.append((e.args[0], e.loc))
    return entr, errs
Esempio n. 3
0
def roundtrip(cur, intxt):
    jellex.lexreset(Lexer, intxt)
    entr = Parser.parse(intxt, lexer=Lexer)
    entr.src = 1
    jelparse.resolv_xrefs(cur, entr)
    for s in entr._sens:
        jdb.augment_xrefs(cur, getattr(s, '_xref', []))
    for s in entr._sens:
        jdb.add_xsens_lists(getattr(s, '_xref', []))
    for s in entr._sens:
        jdb.mark_seq_xrefs(cur, getattr(s, '_xref', []))
    outtxt = fmtjel.entr(entr, nohdr=True)
    return outtxt
Esempio n. 4
0
def roundtrip(cur, intxt):
    # Since hg-180523-6b1a12 we use '\f' to separate the kanji, reading
    # and senses sections in JEL text used as input to jelparse()
    # rather than '\n' which was previously used.  To avoid changing
    # all the test data that still uses '\n', we call secsepfix() to
    # replace the first two '\n's in the test data with '\f's to make
    # suitable for parsing.
    intxt = secsepfix(intxt)
    jellex.lexreset(Lexer, intxt)
    entr = Parser.parse(intxt, lexer=Lexer)
    entr.src = 1
    jelparse.resolv_xrefs(cur, entr)
    for s in entr._sens:
        jdb.augment_xrefs(cur, getattr(s, '_xref', []))
    for s in entr._sens:
        jdb.add_xsens_lists(getattr(s, '_xref', []))
    for s in entr._sens:
        jdb.mark_seq_xrefs(cur, getattr(s, '_xref', []))
    outtxt = fmtjel.entr(entr, nohdr=True)
    return outtxt
Esempio n. 5
0
def _interactive(cur, lexer, parser):
    cnt = 0
    instr = ''
    while 1:
        instr = _getinptext()
        if not instr: break
        jellex.lexreset(lexer, instr)
        try:
            result = parser.parse(instr, lexer=lexer, debug=opts.debug)
        except jelparse.ParseError as e:
            if not e.loc: msg = e.args[0]
            else: msg = "%s\n%s" % (e.args[0], e.loc)
            print(msg)
            continue
        try:
            jelparse.resolv_xrefs(cur, result)
        except ValueError:
            print(e)
        s = fmtjel.entr(result)
        print(s)
Esempio n. 6
0
def _roundtrip(cur, lexer, parser, seq, src):
    # Helper function useful for testing.  It will read an entry
    # identified by 'seq' and 'src' from the database opened on the
    # dpapi cursor object 'cur', convert that entry to a JEL text
    # string, parse the text to get a new entry object, and convert
    # that entry object top JEL text.  The text generated from the
    # the original object, and from the parser-generated object,
    # are returned and can be compared.  The should be identical.

    #pdb.set_trace()
    sql = "SELECT id FROM entr WHERE seq=%s AND src=%s"
    obj = jdb.entrList(cur, sql, [seq, src])
    if not obj: return None, None
    for s in obj[0]._sens:
        jdb.augment_xrefs(cur, getattr(s, '_xref', []))
    jeltxt = _get_jel_text(obj[0])
    jellex.lexreset(lexer, jeltxt)
    result = parser.parse(jeltxt, lexer=lexer, tracking=True)
    resolv_xrefs(cur, result)
    jeltxt2 = _get_jel_text(result)
    return jeltxt, jeltxt2
Esempio n. 7
0
def cherr(self, seq, exception, msg):
    global Cur, Lexer, Parser
    intxt = unittest_extensions.readfile_utf8("data/jelparse/%s.txt" % seq)
    jellex.lexreset(Lexer, intxt)
    _assertRaisesMsg(self, exception, msg, Parser.parse, intxt, lexer=Lexer)
Esempio n. 8
0
def test(lexer, instr):
    jellex.lexreset(lexer, instr)
    while 1:
        tok = lexer.token()
        if not tok: break
        print(tok)