def test_basecases(self): self.assertEqual(lex.lex("if"), [lex.LexToken("if")]) self.assertEqual(lex.lex("else"), [lex.LexToken("else")]) self.assertEqual(lex.lex("int"), [lex.LexToken("int")]) self.assertEqual(lex.lex("void"), [lex.LexToken("void")]) self.assertEqual(lex.lex("return"), [lex.LexToken("return")]) self.assertEqual(lex.lex("while"), [lex.LexToken("while")]) self.assertEqual(lex.lex("+"), [lex.LexToken("+")]) self.assertEqual(lex.lex("-"), [lex.LexToken("-")]) self.assertEqual(lex.lex("abc"), [lex.LexToken("ID", "abc")]) self.assertEqual(lex.lex("12"), [lex.LexToken("NUM", 12)])
def newToken(newType, lineno): tok = lex.LexToken() tok.type = newType tok.value = None tok.lineno = lineno tok.lexpos = -100 return tok
def make_token(type, lineno): t = lex.LexToken() t.type = type t.value = None t.lineno = lineno t.lexpos = -1 return t
def newToken(newType, lineno): # DEF NAME LPAREN NAME COMMA NAME RPAREN COLON tok = lex.LexToken() # NAME EQUAL NAME DOT NAME LPAREN RPAREN tok.type = newType # NAME DOT NAME EQUAL NAME tok.value = None # NAME DOT NAME EQUAL NAME tok.lineno = lineno # NAME DOT NAME EQUAL NAME tok.lexpos = -100 # NAME DOT NAME EQUAL MINUS NUMBER return tok
def create_token(type, value, production=None): '''Create a token of type and value, at the position where 'production' was reduced. Don't specify production if the token is built-in''' t = lex.LexToken() t.type = type t.value = value t.lexpos = -1 if production: t.lineno = production.slice[1].lineno t.filename = production.slice[1].filename else: t.lineno = -1 t.filename = '<builtin>' return t
def add_gcc_macros(self): import platform import sys gcc_macros = ( '__GLIBC_HAVE_LONG_LONG', '__GNUC__', ) # Get these from `gcc -E -dD empty.c` machine_macros = { 'x86_64': ('__amd64', '__amd64__', '__x86_64', '__x86_64__', '__tune_k8__', '__MMX__', '__SSE__', '__SSE2__', '__SSE_MATH__', '__k8', '__k8__'), 'Power Macintosh': ('_ARCH_PPC', '__BIG_ENDIAN__', '_BIG_ENDIAN', '__ppc__', '__POWERPC__'), # TODO everyone else. }.get(platform.machine(), ()) platform_macros = { 'linux': ('__gnu_linux__', '__linux', '__linux__', 'linux', '__unix', '__unix__', 'unix'), 'linux2': ('__gnu_linux__', '__linux', '__linux__', 'linux', '__unix', '__unix__', 'unix'), 'linux3': ('__gnu_linux__', '__linux', '__linux__', 'linux', '__unix', '__unix__', 'unix'), 'darwin': ('__MACH__', '__APPLE__', '__DYNAMIC__', '__APPLE_CC__'), 'win32': ('_WIN32', ), # TODO everyone else }.get(sys.platform, ()) tok1 = lex.LexToken() tok1.type = 'PP_NUMBER' tok1.value = '1' tok1.lineno = -1 tok1.lexpos = -1 for macro in machine_macros + platform_macros + gcc_macros: self.define_object(macro, (tok1, )) self.define_object('inline', ()) self.define_object('__inline', ()) self.define_object('__inline__', ()) self.define_object('__const', (create_token('IDENTIFIER', 'const'), ))
def apply_macros(self, tokens, replacing=None): repl = [] i = 0 while i < len(tokens): token = tokens[i] if token.type == 'IDENTIFIER' and token.value in self.objects: r = self.objects[token.value] if token.value != replacing and r: repl += self.apply_macros(r, token.value) elif token.type == 'IDENTIFIER' and \ token.value in self.functions and \ len(tokens) - i > 2 and \ tokens[i+1].value == '(': r, numargs = self.functions[token.value][:] # build params list i += 2 params = [[]] parens = 0 # balance parantheses within each arg while i < len(tokens): if tokens[i].value == ',' and parens == 0 and \ len(params) < numargs: params.append([]) elif tokens[i].value == ')' and parens == 0: break else: if tokens[i].value == '(': parens += 1 elif tokens[i].value == ')': parens -= 1 params[-1].append(tokens[i]) i += 1 if token.value != replacing and r: newr = [] for t in r: if type(t) == int: newr += params[t] else: newr.append(t) repl += self.apply_macros(newr, token.value) elif token.type == 'DEFINED': if len(tokens) - i > 3 and \ tokens[i + 1].type in ('(', 'LPAREN') and \ tokens[i + 2].type == 'IDENTIFIER' and \ tokens[i + 3].type == ')': result = self.is_defined(tokens[i + 2].value) i += 3 elif len(tokens) - i > 1 and \ tokens[i + 1].type == 'IDENTIFIER': result = self.is_defined(tokens[i + 1].value) i += 1 else: # TODO print >> sys.stderr, 'Invalid use of "defined"' result = 0 t = lex.LexToken() t.value = str(int(result)) t.type = 'PP_NUMBER' t.lexpos = token.lexpos t.lineno = token.lineno repl.append(t) else: repl.append(token) i += 1 return repl
def test_longestmatch(self): self.assertEqual(lex.lex("ifif"), [lex.LexToken("ID", "ifif")]) self.assertEqual(lex.lex("voi"), [lex.LexToken("ID", "voi")]) self.assertEqual(lex.lex("elsea"), [lex.LexToken("ID", "elsea")]) self.assertEqual(lex.lex("whilew"), [lex.LexToken("ID", "whilew")])
def _new_token(type, lineno): tok = lex.LexToken() tok.type = type tok.value = None tok.lineno = lineno return tok