def tokenize(self): """ Convert to tokens, get token list Returns: A list of obtained tokens, recursively tokenized. """ if self.tokens is not None: return self.tokens self.tokens = [] rd = CodeReader(self.source, self.filename) while not rd.has_end(): # discard garbage rd.sweep() # End of string. if rd.has_end(): break # <identifier> elif rd.has_identifier(): self._tokenize_identifier(rd) # {...stuff...} elif rd.has_code_block(): s = rd.consume_block() self._add( T_CodeBlock(s) ) # ; elif rd.starts(';'): self._collect_semicolon(rd) else: rd.error('Unexpected syntax here.') # tokenize all composite tokens for t in self.tokens: if t.is_composite(): t.tokenize() return self.tokens
def _tokenize(self): rd = CodeReader(self.value[1:-1].strip()) rd.sweep() s = rd.consume_code(end=',', eof=True, keep_end=False) t = T_Expression(s) self.tokens.append(t) self.index = t rd.sweep() if not rd.has_end(): raise Exception( 'Invalid array index (must be single expression).' )
def _tokenize(self): """ Parse expression sub-tokens """ rd = CodeReader(self.value) while not rd.has_end(): rd.sweep() if rd.has_identifier(): # an identifier # can be variable or a function call s = rd.consume_identifier() t = T_Name(s) self.tokens.append(t) rd.sweep() if rd.has_bracket(): # array index s = rd.consume_block() t = T_Bracket(s) self.tokens.append(t) elif rd.has_paren(): # paren with arguments for the function s = rd.consume_block() t = T_Paren(s) t.set_type(ParenType.ARGVALS) self.tokens.append(t) elif rd.has_paren(): # Parenthesised sub-expression s = rd.consume_block() t = T_Paren(s) t.set_type(ParenType.EXPR) self.tokens.append(t) elif rd.has_number(): # Number literal s = rd.consume_number() t = T_Number(s) self.tokens.append(t) elif (len(self.tokens) > 0 and type(self.tokens[-1:][0]) is T_Operator and rd.matches(r'[-+]\s*[0-9a-z_]+')): # Number literal sign = rd.consume() if sign == '+': sign = '' rd.sweep() if sign == '-': self.tokens.append(T_Number('-1')) self.tokens.append(T_Operator('*')) elif rd.has_operator(): # Operator s = rd.consume_operator() t = T_Operator(s) self.tokens.append(t) elif rd.has_char(): # Char literal s = rd.consume_char() t = T_Char(s) self.tokens.append(t) elif rd.has_string(): # String literal s = rd.consume_string() t = T_String(s) self.tokens.append(t) else: raise Exception('Unexpected expression token near' + rd.peek(10)) for t in self.tokens: if t.is_composite(): t.tokenize()
def apply_macros(self): """ Recursively apply macros to the output of `process()` To be called after `process()`. The `output` variable is overwritten by this. Returns: The final source code after applying all macro replacements. """ if len(self.output) == 0: print('There is no source code.') return rd = CodeReader(self.output) applied_count = 0 out = '' while not rd.has_end(): out += self._handle_whitespace(rd) if rd.has_end(): break if rd.has_identifier(): ident = rd.consume_identifier() ident_whitesp = rd.consume_inline_whitespace() if ident in self.defines: macros = self.defines[ident] replacement = None if rd.has_bracket(): # array macro bracket = rd.consume_block()[1:-1] for mm in macros: if mm.is_arraylike(): if mm.can_use_args([bracket]): replacement = mm.generate([bracket]) break if replacement is None: out += ident + ident_whitesp out += '[%s]' % bracket else: out += replacement applied_count += 1 elif rd.has_paren(): # func macro paren = rd.consume_block() t = T_Paren(paren) t.set_type(ParenType.ARGVALS) t.tokenize() args = [] for a in t.tokens: args.append(a.value) # print(args) for mm in macros: if mm.is_functionlike(): if mm.can_use_args(args): replacement = mm.generate(args) break if replacement is None: out += ident + ident_whitesp + paren print( '[W] Macro "%s" defined, but can\'t use arguments (%s)' % (ident, ', '.join(args) )) else: out += replacement applied_count += 1 else: # const macro for mm in macros: if mm.can_use_args(None): replacement = mm.generate(None) break if replacement is None: out += ident + ident_whitesp else: out += replacement + ident_whitesp applied_count += 1 else: out += ident + ident_whitesp # give it back # "...", and "sdgfsd""JOINED" "This too" elif rd.has_string(): # handle string concatenation s = '' while rd.has_string(): s += rd.consume_string()[1:-1] # drop quotes rd.sweep() out += '"%s"' % s # //... elif rd.has_inline_comment(): rd.consume_line() # /* ... */ elif rd.has_block_comment(): rd.consume_block_comment() # any char... else: out += rd.consume() self.output = out # take care of macros in macros if applied_count > 0: return self.apply_macros() else: return out
def _tokenize(self): """ Parse expression sub-tokens """ rd = CodeReader(self.value) while not rd.has_end(): rd.sweep() if rd.has_identifier(): # an identifier # can be variable or a function call s = rd.consume_identifier() t = T_Name(s) self.tokens.append(t) rd.sweep() if rd.has_bracket(): # array index s = rd.consume_block() t = T_Bracket(s) self.tokens.append(t) elif rd.has_paren(): # paren with arguments for the function s = rd.consume_block() t = T_Paren(s) t.set_type(ParenType.ARGVALS) self.tokens.append(t) elif rd.has_paren(): # Parenthesised sub-expression s = rd.consume_block() t = T_Paren(s) t.set_type(ParenType.EXPR) self.tokens.append(t) elif rd.has_number(): # Number literal s = rd.consume_number() t = T_Number(s) self.tokens.append(t) elif (((len(self.tokens) > 0 and type(self.tokens[-1:][0]) is T_Operator) or len(self.tokens) == 0) and rd.matches(r'[-+]\s*[0-9a-z_]+')): # Unary operator sign = rd.consume() if sign == '+': sign = '' rd.sweep() if sign == '-': self.tokens.append(T_Operator('@-')) elif rd.has_operator(): # Operator s = rd.consume_operator() t = T_Operator(s) self.tokens.append(t) elif rd.has_char(): # Char literal s = rd.consume_char() t = T_Char(s) self.tokens.append(t) elif rd.has_string(): # String literal s = rd.consume_string() t = T_String(s) self.tokens.append(t) else: raise Exception('Unexpected expression token near' + rd.peek(10)) for t in self.tokens: if t.is_composite(): t.tokenize()