Example #1
0
class TestCLexerErrors(unittest.TestCase):
    """ Test lexing of erroneous strings.
        Works by passing an error functions that saves the error
        in an attribute for later perusal.
    """
    def error_func(self, msg, line, column):
        self.error = msg

    def type_lookup_func(self, typ):
        return False

    def setUp(self):
        self.clex = CLexer(self.error_func, self.type_lookup_func)
        self.clex.build(optimize=False)
        self.error = ""

    def assertLexerError(self, str, error_like):
        # feed the string to the lexer
        self.clex.input(str)

        # Pulls all tokens from the string. Errors will
        # be written into self.error by the error_func
        # callback
        #
        token_types(self.clex)

        # compare the error to the expected
        self.assertTrue(
            re.search(error_like,
                      self.error), "\nExpected error matching: %s\nGot: %s" %
            (error_like, self.error))

        # clear last error, for the sake of subsequent invocations
        self.error = ""

    def test_trivial_tokens(self):
        self.assertLexerError('@', ERR_ILLEGAL_CHAR)
        self.assertLexerError('`', ERR_ILLEGAL_CHAR)
        self.assertLexerError('\\', ERR_ILLEGAL_CHAR)

    def test_integer_constants(self):
        self.assertLexerError('029', ERR_OCTAL)
        self.assertLexerError('012345678', ERR_OCTAL)

    def test_char_constants(self):
        self.assertLexerError("'", ERR_UNMATCHED_QUOTE)
        self.assertLexerError("'b\n", ERR_UNMATCHED_QUOTE)

        self.assertLexerError("'jx'", ERR_INVALID_CCONST)
        self.assertLexerError("'\*'", ERR_INVALID_CCONST)

    def test_string_literals(self):
        self.assertLexerError('"jx\9"', ERR_STRING_ESCAPE)
        self.assertLexerError('"hekllo\* on ix"', ERR_STRING_ESCAPE)
        self.assertLexerError('L"hekllo\* on ix"', ERR_STRING_ESCAPE)

    def test_preprocessor(self):
        self.assertLexerError('#line "ka"', ERR_FILENAME_BEFORE_LINE)
        self.assertLexerError('#line df', ERR_INVALID_LINE_DIRECTIVE)
        self.assertLexerError('#line \n', ERR_LINENUM_MISSING)
class TestCLexerErrors(unittest.TestCase):
    """ Test lexing of erroneous strings.
        Works by passing an error functions that saves the error
        in an attribute for later perusal.
    """
    def error_func(self, msg, line, column):
        self.error = msg

    def type_lookup_func(self, typ):
        return False

    def setUp(self):
        self.clex = CLexer(self.error_func, self.type_lookup_func)
        self.clex.build(optimize=False)
        self.error = ""

    def assertLexerError(self, str, error_like):
        # feed the string to the lexer
        self.clex.input(str)

        # Pulls all tokens from the string. Errors will
        # be written into self.error by the error_func
        # callback
        #
        token_types(self.clex)

        # compare the error to the expected
        self.assertTrue(re.search(error_like, self.error),
            "\nExpected error matching: %s\nGot: %s" %
                (error_like, self.error))

        # clear last error, for the sake of subsequent invocations
        self.error = ""

    def test_trivial_tokens(self):
        self.assertLexerError('@', ERR_ILLEGAL_CHAR)
        self.assertLexerError('$', ERR_ILLEGAL_CHAR)
        self.assertLexerError('`', ERR_ILLEGAL_CHAR)
        self.assertLexerError('\\', ERR_ILLEGAL_CHAR)

    def test_integer_constants(self):
        self.assertLexerError('029', ERR_OCTAL)
        self.assertLexerError('012345678', ERR_OCTAL)

    def test_char_constants(self):
        self.assertLexerError("'", ERR_UNMATCHED_QUOTE)
        self.assertLexerError("'b\n", ERR_UNMATCHED_QUOTE)

        self.assertLexerError("'jx'", ERR_INVALID_CCONST)
        self.assertLexerError("'\*'", ERR_INVALID_CCONST)

    def test_string_literals(self):
        self.assertLexerError('"jx\9"', ERR_STRING_ESCAPE)
        self.assertLexerError('"hekllo\* on ix"', ERR_STRING_ESCAPE)
        self.assertLexerError('L"hekllo\* on ix"', ERR_STRING_ESCAPE)

    def test_preprocessor(self):
        self.assertLexerError('#line "ka"', ERR_FILENAME_BEFORE_LINE)
        self.assertLexerError('#line df', ERR_INVALID_LINE_DIRECTIVE)
        self.assertLexerError('#line \n', ERR_LINENUM_MISSING)
Example #3
0
 def test_on_rbrace_lbrace(self):
     braces = []
     def on_lbrace():
         braces.append('{')
     def on_rbrace():
         braces.append('}')
     clex = CLexer(self.error_func, on_lbrace, on_rbrace,
                   self.type_lookup_func)
     clex.build(optimize=False)
     clex.input('hello { there } } and again }}{')
     token_list(clex)
     self.assertEqual(braces, ['{', '}', '}', '}', '}', '{'])
Example #4
0
class LexerWrapper:
    @staticmethod
    def _error_func(msg, loc0, loc1):
        pass

    @staticmethod
    def _brace_func():
        pass

    @staticmethod
    def _type_lookup_func(typ):
        return False

    def __init__(self) -> None:
        self.lexer = CLexer(self._error_func, self._brace_func,
                            self._brace_func, self._type_lookup_func)
        self.lexer.build(optimize=True, lextab='pycparser.lextab')

    def lex_tokens(self, code: str) -> Iterator[LexToken]:
        self.lexer.reset_lineno()
        self.lexer.input(code)
        while True:
            token = self.lexer.token()
            if token is None:
                break
            yield token

    def lex(self, code: str) -> List[str]:
        return [token.value for token in self.lex_tokens(code)]
Example #5
0
class Lexer:
    def __init__(self):
        self.lexer = CLexer(self._callback, self._callback, self._callback,
                            self._callback)
        self.lexer.build(optimize=True,
                         lextab='pycparser.lextab',
                         outputdir='')

    def _type_lookup(self, *args):
        pass

    @staticmethod
    def _callback(*args):
        pass

    def lex(self, text):
        self.lexer.input(text)
        return self.lexer.lexer

    def keywords(self):
        return self.lexer.tokens

    def lex_num(self, text):
        lexed = self.lex(text)
        pos = 0
        for a in lexed:
            while a.lexpos > pos:
                yield 0
                pos += 1
            alen = len(a.value)
            pos += alen
            if alen > 1:
                yield 1
                while alen > 2:
                    alen -= 1
                    yield 2
            yield KEYWORDS[0].index(a.type)
Example #6
0
 def __init__(self, source, filename):
     source.seek(0)
     self._code = source.read()
     self._code = remove_comments(self._code)
     clex = CLexer(self.error_func, self.on_lbrace_func, self.on_lbrace_func, self.type_lookup_func)
     clex.build(optimize=False)
     clex.input(self._code)
     tokens = list(iter(clex.token, None))
     self._parser_tokens = self.__init_tokens(tokens)
     self._parsed_code = self.__get_parsed_code(self._parser_tokens)
 def test_on_rbrace_lbrace(self):
     braces = []
     def on_lbrace():
         braces.append('{')
     def on_rbrace():
         braces.append('}')
     clex = CLexer(self.error_func, on_lbrace, on_rbrace,
                   self.type_lookup_func)
     clex.build(optimize=False)
     clex.input('hello { there } } and again }}{')
     token_list(clex)
     self.assertEqual(braces, ['{', '}', '}', '}', '}', '{'])
Example #8
0
    def tokenize_with_offset(self, code: str) \
            -> Optional[List[Tuple[int, Token[str, str]]]]:
        code = code.replace("\r", "")

        lexer = CLexer(logger.warning, lambda: None, lambda: None,
                       lambda x: False)
        lexer.build(optimize=False)
        lexer.input(code)
        tokens: List[LexToken] = list(iter(lexer.token, None))

        return [(token.lexpos, Token(token.type, token.value, token.value))
                for token in tokens]
Example #9
0
    def lex(self, text):
        #lex the input file

        self._scope_stack = [dict()]  #open new scope list

        #construct new lexer using the pycparser implementation
        lex = CLexer(self._lex_error_func, self._lex_on_lbrace_func\
           ,self._lex_on_rbrace_func, self._lex_type_lookup_func)

        #initiate the lexer
        lex.build()

        lex.input(text)

        list_of_tokens = []
        while 1:
            tok = lex.token()
            if not tok:
                break
            list_of_tokens.append(
                (tok.value, tok.type, tok.lineno, lex.filename, tok.lexpos))
            #print (tok)
        return list_of_tokens
Example #10
0
class TestCLexerNoErrors(unittest.TestCase):
    """ Test lexing of strings that are not supposed to cause
        errors. Therefore, the error_func passed to the lexer
        raises an exception.
    """
    def error_func(self, msg, line, column):
        self.fail(msg)

    def on_lbrace_func(self):
        pass

    def on_rbrace_func(self):
        pass

    def type_lookup_func(self, typ):
        if typ.startswith('mytype'):
            return True
        else:
            return False

    def setUp(self):
        self.clex = CLexer(self.error_func, lambda: None, lambda: None,
                           self.type_lookup_func)
        self.clex.build(optimize=False)

    def assertTokensTypes(self, str, types):
        self.clex.input(str)
        self.assertEqual(token_types(self.clex), types)

    def test_trivial_tokens(self):
        self.assertTokensTypes('1', ['INT_CONST_DEC'])
        self.assertTokensTypes('-', ['MINUS'])
        self.assertTokensTypes('volatile', ['VOLATILE'])
        self.assertTokensTypes('...', ['ELLIPSIS'])
        self.assertTokensTypes('++', ['PLUSPLUS'])
        self.assertTokensTypes('case int', ['CASE', 'INT'])
        self.assertTokensTypes('caseint', ['ID'])
        self.assertTokensTypes('$dollar cent$', ['ID', 'ID'])
        self.assertTokensTypes('i ^= 1;', ['ID', 'XOREQUAL', 'INT_CONST_DEC', 'SEMI'])

    def test_id_typeid(self):
        self.assertTokensTypes('myt', ['ID'])
        self.assertTokensTypes('mytype', ['TYPEID'])
        self.assertTokensTypes('mytype6 var', ['TYPEID', 'ID'])

    def test_integer_constants(self):
        self.assertTokensTypes('12', ['INT_CONST_DEC'])
        self.assertTokensTypes('12u', ['INT_CONST_DEC'])
        self.assertTokensTypes('12l', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872Ul', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872lU', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872LL', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872ull', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872llu', ['INT_CONST_DEC'])
        self.assertTokensTypes('1009843200000uLL', ['INT_CONST_DEC'])
        self.assertTokensTypes('1009843200000LLu', ['INT_CONST_DEC'])

        self.assertTokensTypes('077', ['INT_CONST_OCT'])
        self.assertTokensTypes('0123456L', ['INT_CONST_OCT'])

        self.assertTokensTypes('0xf7', ['INT_CONST_HEX'])
        self.assertTokensTypes('0b110', ['INT_CONST_BIN'])
        self.assertTokensTypes('0x01202AAbbf7Ul', ['INT_CONST_HEX'])

        # no 0 before x, so ID catches it
        self.assertTokensTypes('xf7', ['ID'])

        # - is MINUS, the rest a constnant
        self.assertTokensTypes('-1', ['MINUS', 'INT_CONST_DEC'])

    def test_special_names(self):
        self.assertTokensTypes('sizeof offsetof', ['SIZEOF', 'OFFSETOF'])

    def test_floating_constants(self):
        self.assertTokensTypes('1.5f', ['FLOAT_CONST'])
        self.assertTokensTypes('01.5', ['FLOAT_CONST'])
        self.assertTokensTypes('.15L', ['FLOAT_CONST'])
        self.assertTokensTypes('0.', ['FLOAT_CONST'])

        # but just a period is a period
        self.assertTokensTypes('.', ['PERIOD'])

        self.assertTokensTypes('3.3e-3', ['FLOAT_CONST'])
        self.assertTokensTypes('.7e25L', ['FLOAT_CONST'])
        self.assertTokensTypes('6.e+125f', ['FLOAT_CONST'])
        self.assertTokensTypes('666e666', ['FLOAT_CONST'])
        self.assertTokensTypes('00666e+3', ['FLOAT_CONST'])

        # but this is a hex integer + 3
        self.assertTokensTypes('0x0666e+3', ['INT_CONST_HEX', 'PLUS', 'INT_CONST_DEC'])

    def test_hexadecimal_floating_constants(self):
        self.assertTokensTypes('0xDE.488641p0', ['HEX_FLOAT_CONST'])
        self.assertTokensTypes('0x.488641p0', ['HEX_FLOAT_CONST'])
        self.assertTokensTypes('0X12.P0', ['HEX_FLOAT_CONST'])

    def test_char_constants(self):
        self.assertTokensTypes(r"""'x'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""L'x'""", ['WCHAR_CONST'])
        self.assertTokensTypes(r"""'\t'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\''""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\?'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\012'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\x2f'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\x2f12'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""L'\xaf'""", ['WCHAR_CONST'])

    def test_on_rbrace_lbrace(self):
        braces = []
        def on_lbrace():
            braces.append('{')
        def on_rbrace():
            braces.append('}')
        clex = CLexer(self.error_func, on_lbrace, on_rbrace,
                      self.type_lookup_func)
        clex.build(optimize=False)
        clex.input('hello { there } } and again }}{')
        token_list(clex)
        self.assertEqual(braces, ['{', '}', '}', '}', '}', '{'])

    def test_string_literal(self):
        self.assertTokensTypes('"a string"', ['STRING_LITERAL'])
        self.assertTokensTypes('L"ing"', ['WSTRING_LITERAL'])
        self.assertTokensTypes(
            '"i am a string too \t"',
            ['STRING_LITERAL'])
        self.assertTokensTypes(
            r'''"esc\ape \"\'\? \0234 chars \rule"''',
            ['STRING_LITERAL'])
        self.assertTokensTypes(
            r'''"hello 'joe' wanna give it a \"go\"?"''',
            ['STRING_LITERAL'])

    def test_mess(self):
        self.assertTokensTypes(
            r'[{}]()',
            ['LBRACKET',
                'LBRACE', 'RBRACE',
            'RBRACKET',
            'LPAREN', 'RPAREN'])

        self.assertTokensTypes(
            r'()||!C&~Z?J',
            ['LPAREN', 'RPAREN',
            'LOR',
            'LNOT', 'ID',
            'AND',
            'NOT', 'ID',
            'CONDOP', 'ID'])

        self.assertTokensTypes(
            r'+-*/%|||&&&^><>=<===!=',
            ['PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
            'LOR', 'OR',
            'LAND', 'AND',
            'XOR',
            'GT', 'LT', 'GE', 'LE', 'EQ', 'NE'])

        self.assertTokensTypes(
            r'++--->?.,;:',
            ['PLUSPLUS', 'MINUSMINUS',
            'ARROW', 'CONDOP',
            'PERIOD', 'COMMA', 'SEMI', 'COLON'])

    def test_exprs(self):
        self.assertTokensTypes(
            'bb-cc',
            ['ID', 'MINUS', 'ID'])

        self.assertTokensTypes(
            'foo & 0xFF',
            ['ID', 'AND', 'INT_CONST_HEX'])

        self.assertTokensTypes(
            '(2+k) * 62',
            ['LPAREN', 'INT_CONST_DEC', 'PLUS', 'ID',
            'RPAREN', 'TIMES', 'INT_CONST_DEC'],)

        self.assertTokensTypes(
            'x | y >> z',
            ['ID', 'OR', 'ID', 'RSHIFT', 'ID'])

        self.assertTokensTypes(
            'x <<= z << 5',
            ['ID', 'LSHIFTEQUAL', 'ID', 'LSHIFT', 'INT_CONST_DEC'])

        self.assertTokensTypes(
            'x = y > 0 ? y : -6',
            ['ID', 'EQUALS',
                'ID', 'GT', 'INT_CONST_OCT',
                'CONDOP',
                'ID',
                'COLON',
                'MINUS', 'INT_CONST_DEC'])

        self.assertTokensTypes(
            'a+++b',
            ['ID', 'PLUSPLUS', 'PLUS', 'ID'])

    def test_statements(self):
        self.assertTokensTypes(
            'for (int i = 0; i < n; ++i)',
            ['FOR', 'LPAREN',
                        'INT', 'ID', 'EQUALS', 'INT_CONST_OCT', 'SEMI',
                        'ID', 'LT', 'ID', 'SEMI',
                        'PLUSPLUS', 'ID',
                    'RPAREN'])

        self.assertTokensTypes(
            'self: goto self;',
            ['ID', 'COLON', 'GOTO', 'ID', 'SEMI'])

        self.assertTokensTypes(
            """ switch (typ)
                {
                    case TYPE_ID:
                        m = 5;
                        break;
                    default:
                        m = 8;
                }""",
            ['SWITCH', 'LPAREN', 'ID', 'RPAREN',
                'LBRACE',
                    'CASE', 'ID', 'COLON',
                        'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI',
                        'BREAK', 'SEMI',
                    'DEFAULT', 'COLON',
                        'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI',
                'RBRACE'])

    def test_preprocessor_line(self):
        self.assertTokensTypes('#abracadabra', ['PPHASH', 'ID'])

        str = r"""
        546
        #line 66 "kwas\df.h"
        id 4
        dsf
        # 9
        armo
        #line 10 "..\~..\test.h"
        tok1
        #line 99999 "include/me.h"
        tok2
        """

        #~ self.clex.filename
        self.clex.input(str)
        self.clex.reset_lineno()

        t1 = self.clex.token()
        self.assertEqual(t1.type, 'INT_CONST_DEC')
        self.assertEqual(t1.lineno, 2)

        t2 = self.clex.token()
        self.assertEqual(t2.type, 'ID')
        self.assertEqual(t2.value, 'id')
        self.assertEqual(t2.lineno, 66)
        self.assertEqual(self.clex.filename, r'kwas\df.h')

        for i in range(3):
            t = self.clex.token()

        self.assertEqual(t.type, 'ID')
        self.assertEqual(t.value, 'armo')
        self.assertEqual(t.lineno, 9)
        self.assertEqual(self.clex.filename, r'kwas\df.h')

        t4 = self.clex.token()
        self.assertEqual(t4.type, 'ID')
        self.assertEqual(t4.value, 'tok1')
        self.assertEqual(t4.lineno, 10)
        self.assertEqual(self.clex.filename, r'..\~..\test.h')

        t5 = self.clex.token()
        self.assertEqual(t5.type, 'ID')
        self.assertEqual(t5.value, 'tok2')
        self.assertEqual(t5.lineno, 99999)
        self.assertEqual(self.clex.filename, r'include/me.h')

    def test_preprocessor_line_funny(self):
        str = r'''
        #line 10 "..\6\joe.h"
        10
        '''
        self.clex.input(str)
        self.clex.reset_lineno()

        t1 = self.clex.token()
        self.assertEqual(t1.type, 'INT_CONST_DEC')
        self.assertEqual(t1.lineno, 10)
        self.assertEqual(self.clex.filename, r'..\6\joe.h')


    def test_preprocessor_pragma(self):
        str = '''
        42
        #pragma
        #pragma helo me
        #pragma once
        # pragma omp parallel private(th_id)
        #\tpragma {pack: 2, smack: 3}
        #pragma <includeme.h> "nowit.h"
        #pragma "string"
        #pragma somestring="some_other_string"
        #pragma id 124124 and numbers 0235495
        59
        '''
        # Check that pragmas are tokenized, including trailing string
        self.clex.input(str)
        self.clex.reset_lineno()

        t1 = self.clex.token()
        self.assertEqual(t1.type, 'INT_CONST_DEC')
        
        t2 = self.clex.token()
        self.assertEqual(t2.type, 'PPPRAGMA')
        
        t3 = self.clex.token()
        self.assertEqual(t3.type, 'PPPRAGMA')
        
        t4 = self.clex.token()
        self.assertEqual(t4.type, 'PPPRAGMASTR')
        self.assertEqual(t4.value, 'helo me')
        
        for i in range(3):
            t = self.clex.token()
            
        t5 = self.clex.token()
        self.assertEqual(t5.type, 'PPPRAGMASTR')
        self.assertEqual(t5.value, 'omp parallel private(th_id)')
        
        for i in range(5):
            ta = self.clex.token()
            self.assertEqual(ta.type, 'PPPRAGMA')
            tb = self.clex.token()
            self.assertEqual(tb.type, 'PPPRAGMASTR')
        
        t6 = self.clex.token()
        self.assertEqual(t6.type, 'INT_CONST_DEC')
        self.assertEqual(t6.lineno, 12)
Example #11
0
class TestCLexerNoErrors(unittest.TestCase):
    """ Test lexing of strings that are not supposed to cause
        errors. Therefore, the error_func passed to the lexer
        raises an exception.
    """
    def error_func(self, msg, line, column):
        self.fail(msg)
    
    def type_lookup_func(self, typ):
        if typ.startswith('mytype'):
            return True
        else:
            return False
    
    def setUp(self):
        self.clex = CLexer(self.error_func, self.type_lookup_func)
        self.clex.build(optimize=False)
    
    def assertTokensTypes(self, str, types):
        self.clex.input(str)
        self.assertEqual(token_types(self.clex), types)
    
    def test_trivial_tokens(self):
        self.assertTokensTypes('1', ['INT_CONST_DEC'])
        self.assertTokensTypes('-', ['MINUS'])
        self.assertTokensTypes('volatile', ['VOLATILE'])
        self.assertTokensTypes('...', ['ELLIPSIS'])
        self.assertTokensTypes('++', ['PLUSPLUS'])
        self.assertTokensTypes('case int', ['CASE', 'INT'])
        self.assertTokensTypes('caseint', ['ID'])
        self.assertTokensTypes('i ^= 1;', ['ID', 'XOREQUAL', 'INT_CONST_DEC', 'SEMI'])
        
    def test_id_typeid(self):
        self.assertTokensTypes('myt', ['ID'])
        self.assertTokensTypes('mytype', ['TYPEID'])
        self.assertTokensTypes('mytype6 var', ['TYPEID', 'ID'])
    
    def test_integer_constants(self):
        self.assertTokensTypes('12', ['INT_CONST_DEC'])
        self.assertTokensTypes('12u', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872Ul', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872LL', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872ull', ['INT_CONST_DEC'])
        
        self.assertTokensTypes('077', ['INT_CONST_OCT'])
        self.assertTokensTypes('0123456L', ['INT_CONST_OCT'])
               
        self.assertTokensTypes('0xf7', ['INT_CONST_HEX'])
        self.assertTokensTypes('0x01202AAbbf7Ul', ['INT_CONST_HEX'])
        
        # no 0 before x, so ID catches it
        self.assertTokensTypes('xf7', ['ID'])
        
        # - is MINUS, the rest a constnant
        self.assertTokensTypes('-1', ['MINUS', 'INT_CONST_DEC'])
        
    def test_floating_constants(self):
        self.assertTokensTypes('1.5f', ['FLOAT_CONST'])
        self.assertTokensTypes('01.5', ['FLOAT_CONST'])
        self.assertTokensTypes('.15L', ['FLOAT_CONST'])
        self.assertTokensTypes('0.', ['FLOAT_CONST'])
        
        # but just a period is a period
        self.assertTokensTypes('.', ['PERIOD'])
        
        self.assertTokensTypes('3.3e-3', ['FLOAT_CONST'])
        self.assertTokensTypes('.7e25L', ['FLOAT_CONST'])
        self.assertTokensTypes('6.e+125f', ['FLOAT_CONST'])
        self.assertTokensTypes('666e666', ['FLOAT_CONST'])
        self.assertTokensTypes('00666e+3', ['FLOAT_CONST'])
        
        # but this is a hex integer + 3
        self.assertTokensTypes('0x0666e+3', ['INT_CONST_HEX', 'PLUS', 'INT_CONST_DEC'])
    
    def test_char_constants(self):
        self.assertTokensTypes(r"""'x'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""L'x'""", ['WCHAR_CONST'])
        self.assertTokensTypes(r"""'\t'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\''""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\?'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\012'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\x2f'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\x2f12'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""L'\xaf'""", ['WCHAR_CONST'])

    def test_string_literal(self):
        self.assertTokensTypes('"a string"', ['STRING_LITERAL'])
        self.assertTokensTypes('L"ing"', ['WSTRING_LITERAL'])
        self.assertTokensTypes(
            '"i am a string too \t"', 
            ['STRING_LITERAL'])
        self.assertTokensTypes(
            r'''"esc\ape \"\'\? \0234 chars \rule"''', 
            ['STRING_LITERAL'])
        self.assertTokensTypes(
            r'''"hello 'joe' wanna give it a \"go\"?"''',
            ['STRING_LITERAL'])

    def test_mess(self):
        self.assertTokensTypes(
            r'[{}]()',
            ['LBRACKET', 
                'LBRACE', 'RBRACE', 
            'RBRACKET', 
            'LPAREN', 'RPAREN'])

        self.assertTokensTypes(
            r'()||!C&~Z?J',
            ['LPAREN', 'RPAREN', 
            'LOR', 
            'LNOT', 'ID', 
            'AND', 
            'NOT', 'ID', 
            'CONDOP', 'ID'])
        
        self.assertTokensTypes(
            r'+-*/%|||&&&^><>=<===!=',
            ['PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', 
            'LOR', 'OR', 
            'LAND', 'AND', 
            'XOR', 
            'GT', 'LT', 'GE', 'LE', 'EQ', 'NE'])
            
        self.assertTokensTypes(
            r'++--->?.,;:',
            ['PLUSPLUS', 'MINUSMINUS', 
            'ARROW', 'CONDOP', 
            'PERIOD', 'COMMA', 'SEMI', 'COLON'])

    def test_exprs(self):
        self.assertTokensTypes(
            'bb-cc',
            ['ID', 'MINUS', 'ID'])

        self.assertTokensTypes(
            'foo & 0xFF',
            ['ID', 'AND', 'INT_CONST_HEX'])

        self.assertTokensTypes(
            '(2+k) * 62', 
            ['LPAREN', 'INT_CONST_DEC', 'PLUS', 'ID', 
            'RPAREN', 'TIMES', 'INT_CONST_DEC'],)
        
        self.assertTokensTypes(
            'x | y >> z',
            ['ID', 'OR', 'ID', 'RSHIFT', 'ID'])
        
        self.assertTokensTypes(
            'x <<= z << 5',
            ['ID', 'LSHIFTEQUAL', 'ID', 'LSHIFT', 'INT_CONST_DEC'])
        
        self.assertTokensTypes(
            'x = y > 0 ? y : -6',
            ['ID', 'EQUALS', 
                'ID', 'GT', 'INT_CONST_OCT', 
                'CONDOP', 
                'ID', 
                'COLON', 
                'MINUS', 'INT_CONST_DEC'])
        
        self.assertTokensTypes(
            'a+++b',
            ['ID', 'PLUSPLUS', 'PLUS', 'ID'])

    def test_statements(self):
        self.assertTokensTypes(
            'for (int i = 0; i < n; ++i)',
            ['FOR', 'LPAREN', 
                        'INT', 'ID', 'EQUALS', 'INT_CONST_OCT', 'SEMI', 
                        'ID', 'LT', 'ID', 'SEMI', 
                        'PLUSPLUS', 'ID', 
                    'RPAREN'])

        self.assertTokensTypes(
            'self: goto self;',
            ['ID', 'COLON', 'GOTO', 'ID', 'SEMI'])
            
        self.assertTokensTypes(
            """ switch (typ)
                {
                    case TYPE_ID:
                        m = 5;
                        break;
                    default:
                        m = 8;
                }""",
            ['SWITCH', 'LPAREN', 'ID', 'RPAREN', 
                'LBRACE', 
                    'CASE', 'ID', 'COLON', 
                        'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI', 
                        'BREAK', 'SEMI', 
                    'DEFAULT', 'COLON', 
                        'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI', 
                'RBRACE'])
    
    def test_preprocessor(self):
        self.assertTokensTypes('#abracadabra', ['PPHASH', 'ID'])
        
        str = r"""
        546
        #line 66 "kwas\df.h" 
        id 4
        dsf
        # 9 
        armo
        #line 10 "..\~..\test.h"
        tok1
        #line 99999 "include/me.h"
        tok2
        """
        
        #~ self.clex.filename
        self.clex.input(str)
        self.clex.reset_lineno()
        
        t1 = self.clex.token()
        self.assertEqual(t1.type, 'INT_CONST_DEC')
        self.assertEqual(t1.lineno, 2)
        
        t2 = self.clex.token()
        self.assertEqual(t2.type, 'ID')
        self.assertEqual(t2.value, 'id')
        self.assertEqual(t2.lineno, 66)
        self.assertEqual(self.clex.filename, r'kwas\df.h')
        
        for i in range(3):
            t = self.clex.token()
        
        self.assertEqual(t.type, 'ID')
        self.assertEqual(t.value, 'armo')
        self.assertEqual(t.lineno, 9)
        self.assertEqual(self.clex.filename, r'kwas\df.h')
        
        t4 = self.clex.token()
        self.assertEqual(t4.type, 'ID')
        self.assertEqual(t4.value, 'tok1')
        self.assertEqual(t4.lineno, 10)
        self.assertEqual(self.clex.filename, r'..\~..\test.h')
        
        t5 = self.clex.token()
        self.assertEqual(t5.type, 'ID')
        self.assertEqual(t5.value, 'tok2')
        self.assertEqual(t5.lineno, 99999)
        self.assertEqual(self.clex.filename, r'include/me.h')
class TestCLexerNoErrors(unittest.TestCase):
    """ Test lexing of strings that are not supposed to cause
        errors. Therefore, the error_func passed to the lexer
        raises an exception.
    """
    def error_func(self, msg, line, column):
        self.fail(msg)

    def on_lbrace_func(self):
        pass

    def on_rbrace_func(self):
        pass

    def type_lookup_func(self, typ):
        if typ.startswith('mytype'):
            return True
        else:
            return False

    def setUp(self):
        self.clex = CLexer(self.error_func, lambda: None, lambda: None,
                           self.type_lookup_func)
        self.clex.build(optimize=False)

    def assertTokensTypes(self, str, types):
        self.clex.input(str)
        self.assertEqual(token_types(self.clex), types)

    def test_trivial_tokens(self):
        self.assertTokensTypes('1', ['INT_CONST_DEC'])
        self.assertTokensTypes('-', ['MINUS'])
        self.assertTokensTypes('volatile', ['VOLATILE'])
        self.assertTokensTypes('...', ['ELLIPSIS'])
        self.assertTokensTypes('++', ['PLUSPLUS'])
        self.assertTokensTypes('case int', ['CASE', 'INT'])
        self.assertTokensTypes('caseint', ['ID'])
        self.assertTokensTypes('$dollar cent$', ['ID', 'ID'])
        self.assertTokensTypes('i ^= 1;',
                               ['ID', 'XOREQUAL', 'INT_CONST_DEC', 'SEMI'])

    def test_id_typeid(self):
        self.assertTokensTypes('myt', ['ID'])
        self.assertTokensTypes('mytype', ['TYPEID'])
        self.assertTokensTypes('mytype6 var', ['TYPEID', 'ID'])

    def test_integer_constants(self):
        self.assertTokensTypes('12', ['INT_CONST_DEC'])
        self.assertTokensTypes('12u', ['INT_CONST_DEC'])
        self.assertTokensTypes('12l', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872Ul', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872lU', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872LL', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872ull', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872llu', ['INT_CONST_DEC'])
        self.assertTokensTypes('1009843200000uLL', ['INT_CONST_DEC'])
        self.assertTokensTypes('1009843200000LLu', ['INT_CONST_DEC'])

        self.assertTokensTypes('077', ['INT_CONST_OCT'])
        self.assertTokensTypes('0123456L', ['INT_CONST_OCT'])

        self.assertTokensTypes('0xf7', ['INT_CONST_HEX'])
        self.assertTokensTypes('0b110', ['INT_CONST_BIN'])
        self.assertTokensTypes('0x01202AAbbf7Ul', ['INT_CONST_HEX'])
        self.assertTokensTypes("'12'", ['INT_CONST_CHAR'])
        self.assertTokensTypes("'123'", ['INT_CONST_CHAR'])
        self.assertTokensTypes("'1AB4'", ['INT_CONST_CHAR'])
        self.assertTokensTypes(r"'1A\n4'", ['INT_CONST_CHAR'])

        # no 0 before x, so ID catches it
        self.assertTokensTypes('xf7', ['ID'])

        # - is MINUS, the rest a constnant
        self.assertTokensTypes('-1', ['MINUS', 'INT_CONST_DEC'])

    def test_special_names(self):
        self.assertTokensTypes('sizeof offsetof', ['SIZEOF', 'OFFSETOF'])

    def test_floating_constants(self):
        self.assertTokensTypes('1.5f', ['FLOAT_CONST'])
        self.assertTokensTypes('01.5', ['FLOAT_CONST'])
        self.assertTokensTypes('.15L', ['FLOAT_CONST'])
        self.assertTokensTypes('0.', ['FLOAT_CONST'])

        # but just a period is a period
        self.assertTokensTypes('.', ['PERIOD'])

        self.assertTokensTypes('3.3e-3', ['FLOAT_CONST'])
        self.assertTokensTypes('.7e25L', ['FLOAT_CONST'])
        self.assertTokensTypes('6.e+125f', ['FLOAT_CONST'])
        self.assertTokensTypes('666e666', ['FLOAT_CONST'])
        self.assertTokensTypes('00666e+3', ['FLOAT_CONST'])

        # but this is a hex integer + 3
        self.assertTokensTypes('0x0666e+3',
                               ['INT_CONST_HEX', 'PLUS', 'INT_CONST_DEC'])

    def test_hexadecimal_floating_constants(self):
        self.assertTokensTypes('0xDE.488641p0', ['HEX_FLOAT_CONST'])
        self.assertTokensTypes('0x.488641p0', ['HEX_FLOAT_CONST'])
        self.assertTokensTypes('0X12.P0', ['HEX_FLOAT_CONST'])

    def test_char_constants(self):
        self.assertTokensTypes(r"""'x'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""L'x'""", ['WCHAR_CONST'])
        self.assertTokensTypes(r"""'\t'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\''""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\?'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\0'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\012'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\x2f'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\x2f12'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""L'\xaf'""", ['WCHAR_CONST'])

    def test_on_rbrace_lbrace(self):
        braces = []

        def on_lbrace():
            braces.append('{')

        def on_rbrace():
            braces.append('}')

        clex = CLexer(self.error_func, on_lbrace, on_rbrace,
                      self.type_lookup_func)
        clex.build(optimize=False)
        clex.input('hello { there } } and again }}{')
        token_list(clex)
        self.assertEqual(braces, ['{', '}', '}', '}', '}', '{'])

    def test_string_literal(self):
        self.assertTokensTypes('"a string"', ['STRING_LITERAL'])
        self.assertTokensTypes('L"ing"', ['WSTRING_LITERAL'])
        self.assertTokensTypes('"i am a string too \t"', ['STRING_LITERAL'])
        self.assertTokensTypes(r'''"esc\ape \"\'\? \0234 chars \rule"''',
                               ['STRING_LITERAL'])
        self.assertTokensTypes(r'''"hello 'joe' wanna give it a \"go\"?"''',
                               ['STRING_LITERAL'])
        self.assertTokensTypes(
            '"\123\123\123\123\123\123\123\123\123\123\123\123\123\123\123\123"',
            ['STRING_LITERAL'])
        # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
        # directives with Windows paths as filenames (..\..\dir\file)
        self.assertTokensTypes(r'"\x"', ['STRING_LITERAL'])
        self.assertTokensTypes(
            r'"\a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z\A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z"',
            ['STRING_LITERAL'])
        self.assertTokensTypes(r'"C:\x\fa\x1e\xited"', ['STRING_LITERAL'])
        # The lexer is permissive and allows decimal escapes (not just octal)
        self.assertTokensTypes(r'"jx\9"', ['STRING_LITERAL'])
        self.assertTokensTypes(r'"fo\9999999"', ['STRING_LITERAL'])

    def test_mess(self):
        self.assertTokensTypes(
            r'[{}]()',
            ['LBRACKET', 'LBRACE', 'RBRACE', 'RBRACKET', 'LPAREN', 'RPAREN'])

        self.assertTokensTypes(r'()||!C&~Z?J', [
            'LPAREN', 'RPAREN', 'LOR', 'LNOT', 'ID', 'AND', 'NOT', 'ID',
            'CONDOP', 'ID'
        ])

        self.assertTokensTypes(r'+-*/%|||&&&^><>=<===!=', [
            'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', 'LOR', 'OR', 'LAND',
            'AND', 'XOR', 'GT', 'LT', 'GE', 'LE', 'EQ', 'NE'
        ])

        self.assertTokensTypes(r'++--->?.,;:', [
            'PLUSPLUS', 'MINUSMINUS', 'ARROW', 'CONDOP', 'PERIOD', 'COMMA',
            'SEMI', 'COLON'
        ])

    def test_exprs(self):
        self.assertTokensTypes('bb-cc', ['ID', 'MINUS', 'ID'])

        self.assertTokensTypes('foo & 0xFF', ['ID', 'AND', 'INT_CONST_HEX'])

        self.assertTokensTypes(
            '(2+k) * 62',
            [
                'LPAREN', 'INT_CONST_DEC', 'PLUS', 'ID', 'RPAREN', 'TIMES',
                'INT_CONST_DEC'
            ],
        )

        self.assertTokensTypes('x | y >> z',
                               ['ID', 'OR', 'ID', 'RSHIFT', 'ID'])

        self.assertTokensTypes(
            'x <<= z << 5',
            ['ID', 'LSHIFTEQUAL', 'ID', 'LSHIFT', 'INT_CONST_DEC'])

        self.assertTokensTypes('x = y > 0 ? y : -6', [
            'ID', 'EQUALS', 'ID', 'GT', 'INT_CONST_OCT', 'CONDOP', 'ID',
            'COLON', 'MINUS', 'INT_CONST_DEC'
        ])

        self.assertTokensTypes('a+++b', ['ID', 'PLUSPLUS', 'PLUS', 'ID'])

    def test_statements(self):
        self.assertTokensTypes('for (int i = 0; i < n; ++i)', [
            'FOR', 'LPAREN', 'INT', 'ID', 'EQUALS', 'INT_CONST_OCT', 'SEMI',
            'ID', 'LT', 'ID', 'SEMI', 'PLUSPLUS', 'ID', 'RPAREN'
        ])

        self.assertTokensTypes('self: goto self;',
                               ['ID', 'COLON', 'GOTO', 'ID', 'SEMI'])

        self.assertTokensTypes(
            """ switch (typ)
                {
                    case TYPE_ID:
                        m = 5;
                        break;
                    default:
                        m = 8;
                }""", [
                'SWITCH', 'LPAREN', 'ID', 'RPAREN', 'LBRACE', 'CASE', 'ID',
                'COLON', 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI', 'BREAK',
                'SEMI', 'DEFAULT', 'COLON', 'ID', 'EQUALS', 'INT_CONST_DEC',
                'SEMI', 'RBRACE'
            ])

    def test_preprocessor_line(self):
        self.assertTokensTypes('#abracadabra', ['PPHASH', 'ID'])

        str = r"""
        546
        #line 66 "kwas\df.h"
        id 4
        dsf
        # 9
        armo
        #line 10 "..\~..\test.h"
        tok1
        #line 99999 "include/me.h"
        tok2
        """

        #~ self.clex.filename
        self.clex.input(str)
        self.clex.reset_lineno()

        t1 = self.clex.token()
        self.assertEqual(t1.type, 'INT_CONST_DEC')
        self.assertEqual(t1.lineno, 2)

        t2 = self.clex.token()
        self.assertEqual(t2.type, 'ID')
        self.assertEqual(t2.value, 'id')
        self.assertEqual(t2.lineno, 66)
        self.assertEqual(self.clex.filename, r'kwas\df.h')

        for i in range(3):
            t = self.clex.token()

        self.assertEqual(t.type, 'ID')
        self.assertEqual(t.value, 'armo')
        self.assertEqual(t.lineno, 9)
        self.assertEqual(self.clex.filename, r'kwas\df.h')

        t4 = self.clex.token()
        self.assertEqual(t4.type, 'ID')
        self.assertEqual(t4.value, 'tok1')
        self.assertEqual(t4.lineno, 10)
        self.assertEqual(self.clex.filename, r'..\~..\test.h')

        t5 = self.clex.token()
        self.assertEqual(t5.type, 'ID')
        self.assertEqual(t5.value, 'tok2')
        self.assertEqual(t5.lineno, 99999)
        self.assertEqual(self.clex.filename, r'include/me.h')

    def test_preprocessor_line_funny(self):
        str = r'''
        #line 10 "..\6\joe.h"
        10
        '''
        self.clex.input(str)
        self.clex.reset_lineno()

        t1 = self.clex.token()
        self.assertEqual(t1.type, 'INT_CONST_DEC')
        self.assertEqual(t1.lineno, 10)
        self.assertEqual(self.clex.filename, r'..\6\joe.h')

    def test_preprocessor_pragma(self):
        str = '''
        42
        #pragma
        #pragma helo me
        #pragma once
        # pragma omp parallel private(th_id)
        #\tpragma {pack: 2, smack: 3}
        #pragma <includeme.h> "nowit.h"
        #pragma "string"
        #pragma somestring="some_other_string"
        #pragma id 124124 and numbers 0235495
        59
        '''
        # Check that pragmas are tokenized, including trailing string
        self.clex.input(str)
        self.clex.reset_lineno()

        t1 = self.clex.token()
        self.assertEqual(t1.type, 'INT_CONST_DEC')

        t2 = self.clex.token()
        self.assertEqual(t2.type, 'PPPRAGMA')

        t3 = self.clex.token()
        self.assertEqual(t3.type, 'PPPRAGMA')

        t4 = self.clex.token()
        self.assertEqual(t4.type, 'PPPRAGMASTR')
        self.assertEqual(t4.value, 'helo me')

        for i in range(3):
            t = self.clex.token()

        t5 = self.clex.token()
        self.assertEqual(t5.type, 'PPPRAGMASTR')
        self.assertEqual(t5.value, 'omp parallel private(th_id)')

        for i in range(5):
            ta = self.clex.token()
            self.assertEqual(ta.type, 'PPPRAGMA')
            tb = self.clex.token()
            self.assertEqual(tb.type, 'PPPRAGMASTR')

        t6 = self.clex.token()
        self.assertEqual(t6.type, 'INT_CONST_DEC')
        self.assertEqual(t6.lineno, 12)
Example #13
0
 def setUp(self):
     self.clex = CLexer(self.error_func, self.type_lookup_func)
     self.clex.build(optimize=False)
     self.error = ""
Example #14
0
class TestCLexerNoErrors(unittest.TestCase):
    """ Test lexing of strings that are not supposed to cause
        errors. Therefore, the error_func passed to the lexer
        raises an exception.
    """
    def error_func(self, msg, line, column):
        self.fail(msg)

    def type_lookup_func(self, typ):
        if typ.startswith('mytype'):
            return True
        else:
            return False

    def setUp(self):
        self.clex = CLexer(self.error_func, self.type_lookup_func)
        self.clex.build(optimize=False)

    def assertTokensTypes(self, str, types):
        self.clex.input(str)
        self.assertEqual(token_types(self.clex), types)

    def test_trivial_tokens(self):
        self.assertTokensTypes('1', ['INT_CONST_DEC'])
        self.assertTokensTypes('-', ['MINUS'])
        self.assertTokensTypes('volatile', ['VOLATILE'])
        self.assertTokensTypes('...', ['ELLIPSIS'])
        self.assertTokensTypes('++', ['PLUSPLUS'])
        self.assertTokensTypes('case int', ['CASE', 'INT'])
        self.assertTokensTypes('caseint', ['ID'])
        self.assertTokensTypes('i ^= 1;',
                               ['ID', 'XOREQUAL', 'INT_CONST_DEC', 'SEMI'])

    def test_id_typeid(self):
        self.assertTokensTypes('myt', ['ID'])
        self.assertTokensTypes('mytype', ['TYPEID'])
        self.assertTokensTypes('mytype6 var', ['TYPEID', 'ID'])

    def test_integer_constants(self):
        self.assertTokensTypes('12', ['INT_CONST_DEC'])
        self.assertTokensTypes('12u', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872Ul', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872LL', ['INT_CONST_DEC'])
        self.assertTokensTypes('199872ull', ['INT_CONST_DEC'])

        self.assertTokensTypes('077', ['INT_CONST_OCT'])
        self.assertTokensTypes('0123456L', ['INT_CONST_OCT'])

        self.assertTokensTypes('0xf7', ['INT_CONST_HEX'])
        self.assertTokensTypes('0x01202AAbbf7Ul', ['INT_CONST_HEX'])

        # no 0 before x, so ID catches it
        self.assertTokensTypes('xf7', ['ID'])

        # - is MINUS, the rest a constnant
        self.assertTokensTypes('-1', ['MINUS', 'INT_CONST_DEC'])

    def test_floating_constants(self):
        self.assertTokensTypes('1.5f', ['FLOAT_CONST'])
        self.assertTokensTypes('01.5', ['FLOAT_CONST'])
        self.assertTokensTypes('.15L', ['FLOAT_CONST'])
        self.assertTokensTypes('0.', ['FLOAT_CONST'])

        # but just a period is a period
        self.assertTokensTypes('.', ['PERIOD'])

        self.assertTokensTypes('3.3e-3', ['FLOAT_CONST'])
        self.assertTokensTypes('.7e25L', ['FLOAT_CONST'])
        self.assertTokensTypes('6.e+125f', ['FLOAT_CONST'])
        self.assertTokensTypes('666e666', ['FLOAT_CONST'])
        self.assertTokensTypes('00666e+3', ['FLOAT_CONST'])

        # but this is a hex integer + 3
        self.assertTokensTypes('0x0666e+3',
                               ['INT_CONST_HEX', 'PLUS', 'INT_CONST_DEC'])

    def test_hexadecimal_floating_constants(self):
        self.assertTokensTypes('0xDE.488641p0', ['HEX_FLOAT_CONST'])
        self.assertTokensTypes('0x.488641p0', ['HEX_FLOAT_CONST'])
        self.assertTokensTypes('0X12.P0', ['HEX_FLOAT_CONST'])

    def test_char_constants(self):
        self.assertTokensTypes(r"""'x'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""L'x'""", ['WCHAR_CONST'])
        self.assertTokensTypes(r"""'\t'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\''""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\?'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\012'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\x2f'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""'\x2f12'""", ['CHAR_CONST'])
        self.assertTokensTypes(r"""L'\xaf'""", ['WCHAR_CONST'])

    def test_string_literal(self):
        self.assertTokensTypes('"a string"', ['STRING_LITERAL'])
        self.assertTokensTypes('L"ing"', ['WSTRING_LITERAL'])
        self.assertTokensTypes('"i am a string too \t"', ['STRING_LITERAL'])
        self.assertTokensTypes(r'''"esc\ape \"\'\? \0234 chars \rule"''',
                               ['STRING_LITERAL'])
        self.assertTokensTypes(r'''"hello 'joe' wanna give it a \"go\"?"''',
                               ['STRING_LITERAL'])

    def test_mess(self):
        self.assertTokensTypes(
            r'[{}]()',
            ['LBRACKET', 'LBRACE', 'RBRACE', 'RBRACKET', 'LPAREN', 'RPAREN'])

        self.assertTokensTypes(r'()||!C&~Z?J', [
            'LPAREN', 'RPAREN', 'LOR', 'LNOT', 'ID', 'AND', 'NOT', 'ID',
            'CONDOP', 'ID'
        ])

        self.assertTokensTypes(r'+-*/%|||&&&^><>=<===!=', [
            'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', 'LOR', 'OR', 'LAND',
            'AND', 'XOR', 'GT', 'LT', 'GE', 'LE', 'EQ', 'NE'
        ])

        self.assertTokensTypes(r'++--->?.,;:', [
            'PLUSPLUS', 'MINUSMINUS', 'ARROW', 'CONDOP', 'PERIOD', 'COMMA',
            'SEMI', 'COLON'
        ])

    def test_exprs(self):
        self.assertTokensTypes('bb-cc', ['ID', 'MINUS', 'ID'])

        self.assertTokensTypes('foo & 0xFF', ['ID', 'AND', 'INT_CONST_HEX'])

        self.assertTokensTypes(
            '(2+k) * 62',
            [
                'LPAREN', 'INT_CONST_DEC', 'PLUS', 'ID', 'RPAREN', 'TIMES',
                'INT_CONST_DEC'
            ],
        )

        self.assertTokensTypes('x | y >> z',
                               ['ID', 'OR', 'ID', 'RSHIFT', 'ID'])

        self.assertTokensTypes(
            'x <<= z << 5',
            ['ID', 'LSHIFTEQUAL', 'ID', 'LSHIFT', 'INT_CONST_DEC'])

        self.assertTokensTypes('x = y > 0 ? y : -6', [
            'ID', 'EQUALS', 'ID', 'GT', 'INT_CONST_OCT', 'CONDOP', 'ID',
            'COLON', 'MINUS', 'INT_CONST_DEC'
        ])

        self.assertTokensTypes('a+++b', ['ID', 'PLUSPLUS', 'PLUS', 'ID'])

    def test_statements(self):
        self.assertTokensTypes('for (int i = 0; i < n; ++i)', [
            'FOR', 'LPAREN', 'INT', 'ID', 'EQUALS', 'INT_CONST_OCT', 'SEMI',
            'ID', 'LT', 'ID', 'SEMI', 'PLUSPLUS', 'ID', 'RPAREN'
        ])

        self.assertTokensTypes('self: goto self;',
                               ['ID', 'COLON', 'GOTO', 'ID', 'SEMI'])

        self.assertTokensTypes(
            """ switch (typ)
                {
                    case TYPE_ID:
                        m = 5;
                        break;
                    default:
                        m = 8;
                }""", [
                'SWITCH', 'LPAREN', 'ID', 'RPAREN', 'LBRACE', 'CASE', 'ID',
                'COLON', 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI', 'BREAK',
                'SEMI', 'DEFAULT', 'COLON', 'ID', 'EQUALS', 'INT_CONST_DEC',
                'SEMI', 'RBRACE'
            ])

    def test_preprocessor_line(self):
        self.assertTokensTypes('#abracadabra', ['PPHASH', 'ID'])

        str = r"""
        546
        #line 66 "kwas\df.h" 
        id 4
        dsf
        # 9 
        armo
        #line 10 "..\~..\test.h"
        tok1
        #line 99999 "include/me.h"
        tok2
        """

        #~ self.clex.filename
        self.clex.input(str)
        self.clex.reset_lineno()

        t1 = self.clex.token()
        self.assertEqual(t1.type, 'INT_CONST_DEC')
        self.assertEqual(t1.lineno, 2)

        t2 = self.clex.token()
        self.assertEqual(t2.type, 'ID')
        self.assertEqual(t2.value, 'id')
        self.assertEqual(t2.lineno, 66)
        self.assertEqual(self.clex.filename, r'kwas\df.h')

        for i in range(3):
            t = self.clex.token()

        self.assertEqual(t.type, 'ID')
        self.assertEqual(t.value, 'armo')
        self.assertEqual(t.lineno, 9)
        self.assertEqual(self.clex.filename, r'kwas\df.h')

        t4 = self.clex.token()
        self.assertEqual(t4.type, 'ID')
        self.assertEqual(t4.value, 'tok1')
        self.assertEqual(t4.lineno, 10)
        self.assertEqual(self.clex.filename, r'..\~..\test.h')

        t5 = self.clex.token()
        self.assertEqual(t5.type, 'ID')
        self.assertEqual(t5.value, 'tok2')
        self.assertEqual(t5.lineno, 99999)
        self.assertEqual(self.clex.filename, r'include/me.h')

    def test_preprocessor_line_funny(self):
        str = r'''
        #line 10 "..\6\joe.h"
        10
        '''
        self.clex.input(str)
        self.clex.reset_lineno()

        t1 = self.clex.token()
        self.assertEqual(t1.type, 'INT_CONST_DEC')
        self.assertEqual(t1.lineno, 10)
        self.assertEqual(self.clex.filename, r'..\6\joe.h')

    def test_preprocessor_pragma(self):
        str = r'''
        42
        #pragma helo me
        #pragma once
        # pragma omp parallel private(th_id)
        #pragma {pack: 2, smack: 3}
        #pragma <includeme.h> "nowit.h"
        #pragma "string"
        #pragma id 124124 and numbers 0235495
        59
        '''

        # Check that pragmas are ignored but the line number advances
        self.clex.input(str)
        self.clex.reset_lineno()

        t1 = self.clex.token()
        self.assertEqual(t1.type, 'INT_CONST_DEC')
        t2 = self.clex.token()
        self.assertEqual(t2.type, 'INT_CONST_DEC')
        self.assertEqual(t2.lineno, 10)
Example #15
0
 def __init__(self, *args, **kwargs):
     CLexerBase.__init__(self, *args, **kwargs)
Example #16
0
 def __init__(self):
     self.lexer = CLexer(self._callback, self._callback, self._callback,
                         self._callback)
     self.lexer.build(optimize=True,
                      lextab='pycparser.lextab',
                      outputdir='')
Example #17
0
 def setUp(self):
     self.clex = CLexer(self.error_func, lambda: None, lambda: None,
                        self.type_lookup_func)
     self.clex.build(optimize=False)
Example #18
0
sys.path.append(os.path.join(os.getcwd(), 'asm'))
import asm as asm
import json
from PyQt5.QtCore import QTimer

from PyQt5.QtWebEngineWidgets import *

import signal

from pycparser.c_lexer import CLexer

def _lex_error_func(msg, line, column):
  return
  raise ParseError("%s:%s: %s" % (msg, line, column))

clex = CLexer(_lex_error_func, lambda:None, lambda:None, lambda _:False)
clex.build(optimize=True)

def sub_proc(fn, q, args):
    def myexit(signum, frame):
        print('terminated(sub)',file=sys.stderr)
        exit()
    signal.signal(signal.SIGINT, myexit)

    str_out = StringIO()
    _stdout = sys.stdout
    sys.stdout = str_out
    try:
      fn(*args)
    except Exception as ex:
      print(ex)
Example #19
0
 def setUp(self):
     self.clex = CLexer(self.error_func, self.on_lbrace_func,
             self.on_rbrace_func, self.type_lookup_func)
     self.clex.build(optimize=False)
     self.error = ""
Example #20
0
 def __init__(self) -> None:
     self.lexer = CLexer(self._error_func, self._brace_func,
                         self._brace_func, self._type_lookup_func)
     self.lexer.build(optimize=True, lextab='pycparser.lextab')