Exemple #1
0
 def test_simple_strings(self):
     code = r'"hello"'
     self.assertEqual([(Lexer.STR, "hello")], list(Lexer().lex(code)))
     code = r'"hello \\ \t \n world"'
     self.assertEqual([(Lexer.STR, "hello \\ \t \n world")],
                      list(Lexer().lex(code)))
     with self.assertRaises(LexerError):
         list(Lexer().lex('"hello'))
Exemple #2
0
 def test_comment(self):
     lexer = Lexer()
     tokens = lexer.tokenize(
         '<!-- In the interest of restricting article length, please limit this section to '
         'two or three short paragraphs and add any substantial information to the main Issues '
         'in anarchism article. Thank you. -->')
     logging.info(tokens)
     self.assertGreater(len(tokens), 0)
Exemple #3
0
    def test_tokenize(self, name='wikitext'):
        """

        Test tokenizer

        """
        with (DATA_FOLDER / name).open(encoding="utf8") as f:
            text = f.read()
            lexer = Lexer()
            tokens = lexer.tokenize(text)
            logging.info(tokens)
            logging.info('TEXT_LENGTH: {0}'.format(len(text)))
            self.assertGreater(len(tokens), 0)
Exemple #4
0
	def get_prop(self,str):
		in_prop = False
		prop = ''
		empty = True
		for pos,x in enumerate(str):
			if x.isalpha():
				in_prop = True
			if in_prop and x.isspace():
				return Parser(Lexer(prop)).parse(), pos
			if in_prop:
				prop += x
		if in_prop:
			prop.strip('\n')
			return Parser(Lexer(prop)).parse(), len(str)
		return None, 0
Exemple #5
0
class LexerTest(unittest.TestCase):
    def setUp(self):
        self.lexer = Lexer()

    def test_lex(self):
        code = "(map + 'hello' 'world' (id 7))"
        actual = [
            (Lexer.LIST_START, "("),
            (Lexer.SYMBOL, "map"),
            (Lexer.SYMBOL, "+"),
            (Lexer.SYMBOL, "'hello'"),
            (Lexer.SYMBOL, "'world'"),
            (Lexer.LIST_START, "("),
            (Lexer.SYMBOL, "id"),
            (Lexer.SYMBOL, "7"),
            (Lexer.LIST_END, ")"),
            (Lexer.LIST_END, ")"),
        ]
        self.assertEqual(actual, list(self.lexer.lex(code)))

    def test_simple_strings(self):
        code = r'"hello"'
        self.assertEqual([(Lexer.STR, "hello")], list(Lexer().lex(code)))
        code = r'"hello \\ \t \n world"'
        self.assertEqual([(Lexer.STR, "hello \\ \t \n world")],
                         list(Lexer().lex(code)))
        with self.assertRaises(LexerError):
            list(Lexer().lex('"hello'))

    def test_triple_strings(self):
        code = r'"""hello"""'
        self.assertEqual([(Lexer.STR, "hello")], list(Lexer().lex(code)))
        code = r'"""hello \\ \t \n world"""'
        self.assertEqual([(Lexer.STR, "hello \\ \t \n world")],
                         list(Lexer().lex(code)))
Exemple #6
0
def parse_expr(input: str) -> Expr:
    from parsing.lexer import get_all_tokens, Lexer
    from parsing.parser import Parser

    lexer = Lexer(input)
    tokens = get_all_tokens(lexer)

    parser = Parser(tokens)
    return parser.expression()
Exemple #7
0
def parse(string):
    # Parses a string and returns an abstract
    # syntax tree (AST) of the formula written
    # in the string, as long as the formula is
    # written in an appropriate format. Allowed
    # symbols:
    # words from letters, numbers, and _ for
    #    propositional variables;
    # ! && || -> for propositional connectives;
    # X G F U W R for LTL operators.
    return Parser(Lexer(string)).parse()
Exemple #8
0
class TestParser(unittest.TestCase):
    lexer = Lexer()
    grammar = Grammar()

    def test_parse(self, name='wikitext'):
        with (DATA_FOLDER / name).open(encoding="utf8") as f:
            text = f.read()
            t0 = time.time()
            # lexer = Lexer()
            # tokens = lexer.tokenize(text)
            parser = Parser()
            ast = parser.parse(text)
            t1 = time.time()
            # print(ast)
            print('Ast built in: ', t1 - t0)
            return ast

    def test_template(self):
        parser = Parser()
        ast = parser.parse('{{asd}}', Grammar.template)
        print(ast)
        return ast
        # Todo assert

    def test_link(self):
        txt = '[[File:Nearest_stars_rotating_red-green.gif|alt=Rotating 3D image of the nearest stars|thumb|Animated 3D map of the nearest stars, centered on the Sun. {{3d glasses|color=red green}}]]'
        txt2 = '[[File:William Shea.jpg|thumb|upright|[[William Shea]] was instrumental in returning [[National League|National League baseball| [[asd|{{asd}}]]]] to [[New York City]] after five years of absence.]]'
        txt3 = '[[asd]]'
        parser = Parser()
        ast = parser.parse(txt2, Grammar.link)

        print(ast)
        return ast

    def test_headings(self):
        txt = '==asd=='
        txt3 = '===asd==='
        txt4 = '====asd===='
        txt5 = '=====asd====='
        txt6 = '======asd======'
        parser = Parser()
        ast = parser.parse(txt, expression=Grammar.headings)
        print(ast)
        return ast

    def test_compile(self, file='wikitext'):
        with (DATA_FOLDER / file).open(encoding="utf8") as f:
            text = f.read()
            result = Compiler().render(self.test_parse(name=file))
            print(result)
            print('---STATS---')
            print('Wikimedia length', len(text))
            print('Wikoogle length', len(result))
            print('Page compressed for about,',
                  '{:.1%}'.format(len(result) / len(text)))

    def test_comment(self):
        txt = '<!-- In the interest of restricting article length, please limit this section to two or three short ' \
              'paragraphs and add any substantial information to the main Issues in anarchism article. Thank you. ' \
              '--> '
        parser = Parser()
        ast = parser.parse(txt, Grammar.comment)
        print(ast, Compiler().render(ast))

    def test_listener(self):
        def listener(node):
            if node.value.text.startswith('Category'):
                print(node.value.text)

        compiler = Compiler()
        compiler.on(listener, ParseTypes.LINK)
        result = compiler.render(self.test_parse())
Exemple #9
0
def tokenize(input: str) -> List[Token]:
    lexer = Lexer(input)
    return get_all_tokens(lexer)
Exemple #10
0
 def setUp(self):
     self.lexer = Lexer()
Exemple #11
0
 def test_triple_strings(self):
     code = r'"""hello"""'
     self.assertEqual([(Lexer.STR, "hello")], list(Lexer().lex(code)))
     code = r'"""hello \\ \t \n world"""'
     self.assertEqual([(Lexer.STR, "hello \\ \t \n world")],
                      list(Lexer().lex(code)))
Exemple #12
0
 def test_redirect(self):
     lexer = Lexer()
     text = """#REDIRECT [[Ancient Greece]]{{Rcat shell|{{R move}}{{R related}}{{R unprintworthy}}}}"""
     self.assertRaises(RedirectFound, lexer.tokenize, text)