def test_simple_strings(self): code = r'"hello"' self.assertEqual([(Lexer.STR, "hello")], list(Lexer().lex(code))) code = r'"hello \\ \t \n world"' self.assertEqual([(Lexer.STR, "hello \\ \t \n world")], list(Lexer().lex(code))) with self.assertRaises(LexerError): list(Lexer().lex('"hello'))
def test_comment(self): lexer = Lexer() tokens = lexer.tokenize( '<!-- In the interest of restricting article length, please limit this section to ' 'two or three short paragraphs and add any substantial information to the main Issues ' 'in anarchism article. Thank you. -->') logging.info(tokens) self.assertGreater(len(tokens), 0)
def test_tokenize(self, name='wikitext'): """ Test tokenizer """ with (DATA_FOLDER / name).open(encoding="utf8") as f: text = f.read() lexer = Lexer() tokens = lexer.tokenize(text) logging.info(tokens) logging.info('TEXT_LENGTH: {0}'.format(len(text))) self.assertGreater(len(tokens), 0)
def get_prop(self,str): in_prop = False prop = '' empty = True for pos,x in enumerate(str): if x.isalpha(): in_prop = True if in_prop and x.isspace(): return Parser(Lexer(prop)).parse(), pos if in_prop: prop += x if in_prop: prop.strip('\n') return Parser(Lexer(prop)).parse(), len(str) return None, 0
class LexerTest(unittest.TestCase): def setUp(self): self.lexer = Lexer() def test_lex(self): code = "(map + 'hello' 'world' (id 7))" actual = [ (Lexer.LIST_START, "("), (Lexer.SYMBOL, "map"), (Lexer.SYMBOL, "+"), (Lexer.SYMBOL, "'hello'"), (Lexer.SYMBOL, "'world'"), (Lexer.LIST_START, "("), (Lexer.SYMBOL, "id"), (Lexer.SYMBOL, "7"), (Lexer.LIST_END, ")"), (Lexer.LIST_END, ")"), ] self.assertEqual(actual, list(self.lexer.lex(code))) def test_simple_strings(self): code = r'"hello"' self.assertEqual([(Lexer.STR, "hello")], list(Lexer().lex(code))) code = r'"hello \\ \t \n world"' self.assertEqual([(Lexer.STR, "hello \\ \t \n world")], list(Lexer().lex(code))) with self.assertRaises(LexerError): list(Lexer().lex('"hello')) def test_triple_strings(self): code = r'"""hello"""' self.assertEqual([(Lexer.STR, "hello")], list(Lexer().lex(code))) code = r'"""hello \\ \t \n world"""' self.assertEqual([(Lexer.STR, "hello \\ \t \n world")], list(Lexer().lex(code)))
def parse_expr(input: str) -> Expr: from parsing.lexer import get_all_tokens, Lexer from parsing.parser import Parser lexer = Lexer(input) tokens = get_all_tokens(lexer) parser = Parser(tokens) return parser.expression()
def parse(string): # Parses a string and returns an abstract # syntax tree (AST) of the formula written # in the string, as long as the formula is # written in an appropriate format. Allowed # symbols: # words from letters, numbers, and _ for # propositional variables; # ! && || -> for propositional connectives; # X G F U W R for LTL operators. return Parser(Lexer(string)).parse()
class TestParser(unittest.TestCase): lexer = Lexer() grammar = Grammar() def test_parse(self, name='wikitext'): with (DATA_FOLDER / name).open(encoding="utf8") as f: text = f.read() t0 = time.time() # lexer = Lexer() # tokens = lexer.tokenize(text) parser = Parser() ast = parser.parse(text) t1 = time.time() # print(ast) print('Ast built in: ', t1 - t0) return ast def test_template(self): parser = Parser() ast = parser.parse('{{asd}}', Grammar.template) print(ast) return ast # Todo assert def test_link(self): txt = '[[File:Nearest_stars_rotating_red-green.gif|alt=Rotating 3D image of the nearest stars|thumb|Animated 3D map of the nearest stars, centered on the Sun. {{3d glasses|color=red green}}]]' txt2 = '[[File:William Shea.jpg|thumb|upright|[[William Shea]] was instrumental in returning [[National League|National League baseball| [[asd|{{asd}}]]]] to [[New York City]] after five years of absence.]]' txt3 = '[[asd]]' parser = Parser() ast = parser.parse(txt2, Grammar.link) print(ast) return ast def test_headings(self): txt = '==asd==' txt3 = '===asd===' txt4 = '====asd====' txt5 = '=====asd=====' txt6 = '======asd======' parser = Parser() ast = parser.parse(txt, expression=Grammar.headings) print(ast) return ast def test_compile(self, file='wikitext'): with (DATA_FOLDER / file).open(encoding="utf8") as f: text = f.read() result = Compiler().render(self.test_parse(name=file)) print(result) print('---STATS---') print('Wikimedia length', len(text)) print('Wikoogle length', len(result)) print('Page compressed for about,', '{:.1%}'.format(len(result) / len(text))) def test_comment(self): txt = '<!-- In the interest of restricting article length, please limit this section to two or three short ' \ 'paragraphs and add any substantial information to the main Issues in anarchism article. Thank you. ' \ '--> ' parser = Parser() ast = parser.parse(txt, Grammar.comment) print(ast, Compiler().render(ast)) def test_listener(self): def listener(node): if node.value.text.startswith('Category'): print(node.value.text) compiler = Compiler() compiler.on(listener, ParseTypes.LINK) result = compiler.render(self.test_parse())
def tokenize(input: str) -> List[Token]: lexer = Lexer(input) return get_all_tokens(lexer)
def setUp(self): self.lexer = Lexer()
def test_triple_strings(self): code = r'"""hello"""' self.assertEqual([(Lexer.STR, "hello")], list(Lexer().lex(code))) code = r'"""hello \\ \t \n world"""' self.assertEqual([(Lexer.STR, "hello \\ \t \n world")], list(Lexer().lex(code)))
def test_redirect(self): lexer = Lexer() text = """#REDIRECT [[Ancient Greece]]{{Rcat shell|{{R move}}{{R related}}{{R unprintworthy}}}}""" self.assertRaises(RedirectFound, lexer.tokenize, text)