Ejemplo n.º 1
0
    def testEmptyString(self):
        """Tests parsing an empty string
        """
        test1 = '""'
        test2 = '\'\''
        output1 = [token for token in lex.Lexer(test1)]
        output2 = [token for token in lex.Lexer(test2)]

        for o in (output1, output2):
            ostr = str(o)
            self.assertEqual(2, len(o), ostr)
            self.assertIsInstance(o[0], tokens.StringToken, ostr)
            self.assertIs(o[1], tokens.eof, ostr)
            self.assertIsInstance(o[0].value, str, ostr)
            self.assertEqual(0, len(o[0].value), ostr)
Ejemplo n.º 2
0
 def testEscapesInString(self):
     """Tests the basic escape characters
     """
     test = '"\\a\\b\\f\\n\\r\\v\\\\"'
     expected = '''\a\b\f\n\r\v\\'''
     output = [token for token in lex.Lexer(test)][0]
     self.assertEqual(expected, output.value)
Ejemplo n.º 3
0
 def testNewLineInString(self):
     """Tests what happens when a raw newline appears in the string.
     It should be parsed normally as a newline, as this grammar allows
     strings to cross line boundaries.
     """
     test = '"hello world\nhow are you?"'
     output = [token for token in lex.Lexer(test)][0]
     self.assertEqual(test[1:-1], output.value)
Ejemplo n.º 4
0
    def testQuoteInString(self):
        """Tests parsing a string that contains quotation marks that are escaped
        """
        # These strings should evaluate to the same thing. One uses
        # double quotes in the string, and the other uses single quotes.
        test1 = '"\\\'\'\\"hello\\", how\\\'s everyone feeling today?"'
        test2 = "'\\'\\'\"hello\\\", how\\'s everyone feeling today?'"
        output11, output12 = [token for token in lex.Lexer(test1)]
        output21, output22 = [token for token in lex.Lexer(test2)]

        self.assertEqual(output12, output22)
        self.assertIs(output12, tokens.eof)

        # Used a docstring so I don't have to escape anything.
        exp_str = '''   ''"hello", how's everyone feeling today?     '''.strip(
        )

        self.assertEqual(output11, output21)
        self.assertEqual(output11.value, exp_str)
Ejemplo n.º 5
0
    def testEndOfStatement(self):
        """Tests detecting End Of Statement.
        """
        test = '\n\n;\n\n;'
        lex_obj = lex.Lexer(test)
        output = [token for token in lex_obj]

        self.assertEqual(2, len(output))
        self.assertIs(tokens.eos, output[0])
        self.assertIs(tokens.eof, output[1])
Ejemplo n.º 6
0
 def testIdentifierPrecededProceededBySpace(self):
     """Tests parsing a single identifier surrounded by spaces.
     """
     test = '  lorem_ip$um_dolor_$it_amet '
     lex_obj = lex.Lexer(test)
     output = [token for token in lex_obj]
     self.assertEqual(2, len(output))
     self.assertIsInstance(output[0], tokens.IdentifierToken)
     self.assertEqual('lorem_ip$um_dolor_$it_amet', output[0].value)
     self.assertIs(tokens.eof, output[1])
Ejemplo n.º 7
0
    def testIdentifierThenEos(self):
        """Tests parsing an identifier and then an end-of-statement.
        """
        test = 'hello_world;'
        lex_obj = lex.Lexer(test)
        output = [token for token in lex_obj]

        self.assertEqual(3, len(output))
        self.assertIsInstance(output[0], tokens.IdentifierToken)
        self.assertIs(output[1], tokens.eos)
        self.assertIs(output[2], tokens.eof)
Ejemplo n.º 8
0
    def testBasicString(self):
        """Tests parsing a string that contains some characters or something.
        Also ensures both quotation types work.
        """

        # Double-quote string.
        test = '"I like shorts! They are comfy, and easy to wear!"'
        output = [token for token in lex.Lexer(test)]
        self.assertEqual(2, len(output), str(output))
        self.assertIs(output[1], tokens.eof)
        self.assertEqual('I like shorts! They are comfy, and easy to wear!',
                         output[0].value)

        # Single-quote string.
        test = "'I like shorts! They are comfy, and easy to wear!'"
        output = [token for token in lex.Lexer(test)]
        self.assertEqual(2, len(output), str(output))
        self.assertIs(output[1], tokens.eof)
        self.assertEqual('I like shorts! They are comfy, and easy to wear!',
                         output[0].value)
Ejemplo n.º 9
0
    def testOperators(self):
        """Tests correctly parsing all operators registered
        """
        for op, tok in tokens.OperatorToken.operators.items():
            output = [token for token in lex.Lexer(op)]

            self.assertEqual(2, len(output), op)
            self.assertIsInstance(output[0], tokens.OperatorToken, op)
            self.assertIsInstance(output[0].value, str, op)
            self.assertIs(tok, output[0], op)
            self.assertIs(tokens.eof, output[1], op)
Ejemplo n.º 10
0
    def testFloatFullValue(self):
        """Tests a float in format `9876.1234E-123`
        """
        test = '9876.1234e-567'
        lex_obj = lex.Lexer(test)
        output = [token for token in lex_obj]

        self.assertEqual(2, len(output))
        self.assertIsInstance(output[0], tokens.RealToken)
        self.assertIsInstance(output[0].value, float)
        self.assertEqual(float(test), output[0].value)
        self.assertIs(output[1], tokens.eof)
Ejemplo n.º 11
0
    def testFloatNumExpMinusValue(self):
        """Tests a float in format `1234e-123`
        """
        test = '12345e-123'
        lex_obj = lex.Lexer(test)
        output = [token for token in lex_obj]

        self.assertEqual(2, len(output))
        self.assertIsInstance(output[0], tokens.RealToken)
        self.assertIsInstance(output[0].value, float)
        self.assertEqual(float(test), output[0].value)
        self.assertIs(output[1], tokens.eof)
Ejemplo n.º 12
0
    def testFloatPointValue(self):
        """Tests a float in format `.1234`
        """
        test = '.12345'
        lex_obj = lex.Lexer(test)
        output = [token for token in lex_obj]

        self.assertEqual(2, len(output))
        self.assertIsInstance(output[0], tokens.RealToken)
        self.assertIsInstance(output[0].value, float)
        self.assertEqual(float(test), output[0].value)
        self.assertIs(output[1], tokens.eof)
Ejemplo n.º 13
0
    def testBase16XInteger(self):
        """Tests parsing a base 16 integer
        """
        test = '0X1234FAABAAD99453'
        lex_obj = lex.Lexer(test)
        output = [token for token in lex_obj]

        self.assertEqual(2, len(output))
        self.assertIsInstance(output[0], tokens.IntToken)
        self.assertIsInstance(output[0].value, int)
        self.assertEqual(int(test, 16), output[0].value)
        self.assertIs(output[1], tokens.eof)
Ejemplo n.º 14
0
    def testBase8OInteger(self):
        """Tests parsing a base 8 integer
        """
        test = '0O70062233'
        lex_obj = lex.Lexer(test)
        output = [token for token in lex_obj]

        self.assertEqual(2, len(output))
        self.assertIsInstance(output[0], tokens.IntToken)
        self.assertIsInstance(output[0].value, int)
        self.assertEqual(int(test, 8), output[0].value)
        self.assertIs(output[1], tokens.eof)
Ejemplo n.º 15
0
 def testTwoIdentifiersSeparatedBySpace(self):
     """Tests two identifiers separated by a space.
     """
     test = 'hel$0 _world'
     lex_obj = lex.Lexer(test)
     output = [token for token in lex_obj]
     self.assertEqual(3, len(output), 'Expected three tokens.')
     self.assertIsInstance(output[0], tokens.IdentifierToken,
                           'Expected first token to be identifier')
     self.assertEqual('hel$0', output[0].value)
     self.assertIsInstance(output[1], tokens.IdentifierToken,
                           'Expected second token to be identifier')
     self.assertEqual('_world', output[1].value)
     self.assertIs(output[2], tokens.eof)
Ejemplo n.º 16
0
 def testIdentifierStartsWithUnderscore(self):
     """Ensures we detect a single identifier when the string starts with an
     underscore.
     """
     test = '_hello_world'
     lex_obj = lex.Lexer(test)
     output = [token for token in lex_obj]
     self.assertEqual(2, len(output), f'Did not get two tokens. {output}')
     # First should be identifier token and read '_hello_world'
     self.assertIsInstance(output[0], tokens.IdentifierToken,
                           'Expected identifier token.')
     self.assertEqual(output[0].value, '_hello_world')
     # Last should be EOF
     self.assertIs(output[1], tokens.eof)
Ejemplo n.º 17
0
    def testValidWhitespace(self):
        """Ensures we skip over all whitespace."""
        test = '\t \t \t \t'

        lex_obj = lex.Lexer(test)
        output = [token for token in lex_obj]

        # We should have only one token
        self.assertEqual(1, len(output), 'Did not get exactly 1 token.')
        # This token should be EOF
        self.assertIs(tokens.eof, output[0], 'Did not get EOF')
        # Furthermore, the index should be at EOF.
        self.assertEqual(len(test), lex_obj.index,
                         'Did not leave index at EOF')
Ejemplo n.º 18
0
    def testUtfLiteral(self):
        """Tests some valid UTF-8 literals.
        """
        expected_cs = ['@', '?', '\N{ANGRY FACE}', '\N{OK HAND SIGN}', '\f']

        for c in expected_cs:
            # Gets the hexadecimal part, appends it onto '\u'
            hex_value = '\\u' + hex(ord(c))[2:]

            code = f'"The character is: {hex_value}."'
            expected = f'The character is: {c}.'

            output = [token for token in lex.Lexer(code)][0]
            self.assertEqual(expected, output.value, c)
Ejemplo n.º 19
0
    def testIdentifierActuallyReserveWord(self):
        """Ensures we parse reserve words correctly.
        """
        for rw in ('for', 'while', 'if', 'elif', 'else', 'continue', 'break',
                   'in', 'return'):
            output = [token for token in lex.Lexer(rw)]

            self.assertEqual(2, len(output), rw)
            self.assertIsInstance(output[0], tokens.ReserveWordToken, rw)
            self.assertEqual(rw, output[0].name, rw)
            self.assertEqual(rw, output[0].value, rw)
            self.assertIs(tokens.eof, output[1], rw)

        for sp_n, sp_v in {
                'INF': math.inf,
                'NaN': math.nan,
                'true': True,
                'false': False,
                'null': None
        }.items():
            output = [token for token in lex.Lexer(sp_n)]

            # Edge cases where tests may fail.
            # IEEE floating point integer spec specifies that NaN != NaN, so
            # obviously this test is going to bork here.
            self.assertEqual(2, len(output), str(output))
            self.assertIsInstance(output[0], tokens.ReserveWordToken, sp_n)

            self.assertEqual(sp_n, output[0].name, sp_n)
            if sp_n.lower() == 'inf':
                self.assertTrue(math.isinf(output[0].value), 'Not INF.')
            elif sp_n.lower() == 'nan':
                self.assertTrue(math.isnan(output[0].value), 'Not NaN')
            else:
                self.assertEqual(sp_v, output[0].value, sp_n)

            self.assertIs(tokens.eof, output[1], sp_n)
Ejemplo n.º 20
0
    def testEof(self):
        """Ensures we detect EOF."""
        test = ''

        lex_obj = lex.Lexer(test)
        output = [token for token in lex_obj]
        # We should have only one token
        self.assertEqual(1, len(output), 'Did not get exactly 1 token.')
        # This token should be EOF
        self.assertIs(tokens.eof, output[0], 'Did not get EOF')
        self.assertIsInstance(tokens.eof, tokens.MiscToken,
                              'EOF must be MiscToken type.')
        # Furthermore, the index should be at EOF.
        self.assertEqual(len(test), lex_obj.index,
                         'Did not leave index at EOF')
Ejemplo n.º 21
0
 def testIdentifierWholeString(self):
     """Tests specifically all characters allowed in an identifier.
     """
     test = \
         '_1234567890$abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
     lex_obj = lex.Lexer(test)
     output = [token for token in lex_obj]
     # Two tokens
     self.assertEqual(2, len(output), 'Did not get two tokens.')
     # First is identifier
     self.assertIsInstance(output[0], tokens.IdentifierToken,
                           'Expected identifier token.')
     # Second is eof
     self.assertIs(output[1], tokens.eof, 'Expected EOF.')
     # First matches test input
     self.assertEqual(test, output[0].value)
Ejemplo n.º 22
0
    def testUtfNTag(self):
        """Tests out the python-style \\N{ABCDEFG} tag syntax (without the
        first backslash, of course).
        """
        tests = {
            '"\\N{ANGRY FACE}"': '\N{ANGRY FACE}',
            '"\\N{HEAVY BLACK HEART}"': '\N{HEAVY BLACK HEART}',
            '"\\N{SPACE}"': ' ',
            '"\\N{EURO SIGN}"': '€',
            '"\\N{superscript three}\\N{superscript one}"': '³¹',
            '"\\N{LEFT CURLY BRACKET}"': '{',
            '"\\N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}"': '«',
            '"\\N{RIGHTWARDS ARROW}"': '→',
        }

        for input_expr, output_expr in tests.items():
            output = [token for token in lex.Lexer(input_expr)][0]
            self.assertEqual(output_expr, output.value, output.value)
Ejemplo n.º 23
0
    def testExampleCode(self):
        """Tests some code that might be remotely similar to what we hope to
        parse once the system is complete.
        """

        code = '''\
            find_math_stuff = (list) {
                mean = .0e1;
                median = list[len(list)/2];
                max_val = -INF;
                min_val = INF;
                
                for el in list {
                    mean += el;
                    if el < min_val {
                        min_val = el;
                    };
                    if el > max_val {
                        max_val = el;
                    };
                };
                mean /= len(list);
                
                print(mean, median, max_val, min_val);
            };
            
            find_math_stuff(1,2,3,4,5,6,7,8,9,10);
        '''

        output = tuple(token for token in lex.Lexer(code))
        expected_output = (
            # find_math_stuff = (list) {
            tokens.IdentifierToken('find_math_stuff'),
            tokens.ass,
            tokens.lparen,
            tokens.IdentifierToken('list'),
            tokens.rparen,
            tokens.lbrace,
            # ....mean = 0;
            tokens.IdentifierToken('mean'),
            tokens.ass,
            tokens.RealToken(0.0),
            tokens.eos,
            # ....median = list[len(list)/2];
            tokens.IdentifierToken('median'),
            tokens.ass,
            tokens.IdentifierToken('list'),
            tokens.lsquare,
            tokens.IdentifierToken('len'),
            tokens.lparen,
            tokens.IdentifierToken('list'),
            tokens.rparen,
            tokens.divide,
            tokens.IntToken(2),
            tokens.rsquare,
            tokens.eos,
            # ....max_val = -INF;
            tokens.IdentifierToken('max_val'),
            tokens.ass,
            tokens.minus,
            tokens.inf_rw,
            tokens.eos,
            # ....min_val = INF;
            tokens.IdentifierToken('min_val'),
            tokens.ass,
            tokens.inf_rw,
            tokens.eos,
            # ....for el in list {
            tokens.for_rw,
            tokens.IdentifierToken('el'),
            tokens.in_rw,
            tokens.IdentifierToken('list'),
            tokens.lbrace,
            # ........mean += el;
            tokens.IdentifierToken('mean'),
            tokens.plus_ass,
            tokens.IdentifierToken('el'),
            tokens.eos,
            # ........if el < min_val {
            tokens.if_rw,
            tokens.IdentifierToken('el'),
            tokens.lt,
            tokens.IdentifierToken('min_val'),
            tokens.lbrace,
            # ............min_val = el;
            tokens.IdentifierToken('min_val'),
            tokens.ass,
            tokens.IdentifierToken('el'),
            tokens.eos,
            # ........}
            tokens.rbrace,
            tokens.eos,
            # ........if el > max_val {
            tokens.if_rw,
            tokens.IdentifierToken('el'),
            tokens.gt,
            tokens.IdentifierToken('max_val'),
            tokens.lbrace,
            # ............max_val = el;
            tokens.IdentifierToken('max_val'),
            tokens.ass,
            tokens.IdentifierToken('el'),
            tokens.eos,
            # ........}
            tokens.rbrace,
            tokens.eos,
            # ....}
            tokens.rbrace,
            tokens.eos,
            # ....mean /= len(list);
            tokens.IdentifierToken('mean'),
            tokens.divide_ass,
            tokens.IdentifierToken('len'),
            tokens.lparen,
            tokens.IdentifierToken('list'),
            tokens.rparen,
            tokens.eos,
            # ....print(mean, median, max_val, min_val);
            tokens.IdentifierToken('print'),
            tokens.lparen,
            tokens.IdentifierToken('mean'),
            tokens.comma,
            tokens.IdentifierToken('median'),
            tokens.comma,
            tokens.IdentifierToken('max_val'),
            tokens.comma,
            tokens.IdentifierToken('min_val'),
            tokens.rparen,
            tokens.eos,
            # }
            tokens.rbrace,
            tokens.eos,
            # find_math_stuff(1,2,3,4,5,6,7,8,9,10);
            tokens.IdentifierToken('find_math_stuff'),
            tokens.lparen,
            tokens.IntToken(1),
            tokens.comma,
            tokens.IntToken(2),
            tokens.comma,
            tokens.IntToken(3),
            tokens.comma,
            tokens.IntToken(4),
            tokens.comma,
            tokens.IntToken(5),
            tokens.comma,
            tokens.IntToken(6),
            tokens.comma,
            tokens.IntToken(7),
            tokens.comma,
            tokens.IntToken(8),
            tokens.comma,
            tokens.IntToken(9),
            tokens.comma,
            tokens.IntToken(10),
            tokens.rparen,
            tokens.eos,
            tokens.eof)

        print(*[repr(token) for token in output], sep='\n')
        self.assertEqual(len(expected_output), len(output))

        for i in range(0, len(output)):
            expected = expected_output[i]
            actual = output[i]
            self.assertEqual(expected, actual, f'Token {i}')