def _check(_, in_str, begin='INITIAL'): jellex.lexreset(_.lexer, in_str, begin) tokens = [] values = [] while 1: tok = _.lexer.token() if not tok: break tokens.append(tok.type) values.append(tok.value) return tokens, values
def parse(krstext): entr = None errs = [] lexer, tokens = jellex.create_lexer() parser = jelparse.create_parser(lexer, tokens) jellex.lexreset(lexer, krstext) try: entr = parser.parse(krstext, lexer=lexer, tracking=True) except jelparse.ParseError as e: errs.append((e.args[0], e.loc)) return entr, errs
def roundtrip(cur, intxt): jellex.lexreset(Lexer, intxt) entr = Parser.parse(intxt, lexer=Lexer) entr.src = 1 jelparse.resolv_xrefs(cur, entr) for s in entr._sens: jdb.augment_xrefs(cur, getattr(s, '_xref', [])) for s in entr._sens: jdb.add_xsens_lists(getattr(s, '_xref', [])) for s in entr._sens: jdb.mark_seq_xrefs(cur, getattr(s, '_xref', [])) outtxt = fmtjel.entr(entr, nohdr=True) return outtxt
def roundtrip(cur, intxt): # Since hg-180523-6b1a12 we use '\f' to separate the kanji, reading # and senses sections in JEL text used as input to jelparse() # rather than '\n' which was previously used. To avoid changing # all the test data that still uses '\n', we call secsepfix() to # replace the first two '\n's in the test data with '\f's to make # suitable for parsing. intxt = secsepfix(intxt) jellex.lexreset(Lexer, intxt) entr = Parser.parse(intxt, lexer=Lexer) entr.src = 1 jelparse.resolv_xrefs(cur, entr) for s in entr._sens: jdb.augment_xrefs(cur, getattr(s, '_xref', [])) for s in entr._sens: jdb.add_xsens_lists(getattr(s, '_xref', [])) for s in entr._sens: jdb.mark_seq_xrefs(cur, getattr(s, '_xref', [])) outtxt = fmtjel.entr(entr, nohdr=True) return outtxt
def _interactive(cur, lexer, parser): cnt = 0 instr = '' while 1: instr = _getinptext() if not instr: break jellex.lexreset(lexer, instr) try: result = parser.parse(instr, lexer=lexer, debug=opts.debug) except jelparse.ParseError as e: if not e.loc: msg = e.args[0] else: msg = "%s\n%s" % (e.args[0], e.loc) print(msg) continue try: jelparse.resolv_xrefs(cur, result) except ValueError: print(e) s = fmtjel.entr(result) print(s)
def _roundtrip(cur, lexer, parser, seq, src): # Helper function useful for testing. It will read an entry # identified by 'seq' and 'src' from the database opened on the # dpapi cursor object 'cur', convert that entry to a JEL text # string, parse the text to get a new entry object, and convert # that entry object top JEL text. The text generated from the # the original object, and from the parser-generated object, # are returned and can be compared. The should be identical. #pdb.set_trace() sql = "SELECT id FROM entr WHERE seq=%s AND src=%s" obj = jdb.entrList(cur, sql, [seq, src]) if not obj: return None, None for s in obj[0]._sens: jdb.augment_xrefs(cur, getattr(s, '_xref', [])) jeltxt = _get_jel_text(obj[0]) jellex.lexreset(lexer, jeltxt) result = parser.parse(jeltxt, lexer=lexer, tracking=True) resolv_xrefs(cur, result) jeltxt2 = _get_jel_text(result) return jeltxt, jeltxt2
def cherr(self, seq, exception, msg): global Cur, Lexer, Parser intxt = unittest_extensions.readfile_utf8("data/jelparse/%s.txt" % seq) jellex.lexreset(Lexer, intxt) _assertRaisesMsg(self, exception, msg, Parser.parse, intxt, lexer=Lexer)
def test(lexer, instr): jellex.lexreset(lexer, instr) while 1: tok = lexer.token() if not tok: break print(tok)