def multi_line_comment(char_stream, location): def _values(char_stream): while peek_or_terminal(char_stream) is not terminal: current_value = consume(char_stream) yield current_value # we have consumed a star check if its adjacent value is a forward slash if it is consume and break if current_value == TOKENS.STAR and peek_or_terminal(char_stream) == TOKENS.FORWARD_SLASH: yield consume(char_stream) break _comment = ''.join(_values(char_stream)) if _comment.endswith(TOKENS.END_OF_MULTI_LINE_COMMENT): return MULTI_LINE_COMMENT(''.join(_comment), location) raise_error('{l} Could no locate end of multi-line comment.'.format(l=location))
def declarations(tokens, symbol_table): # storage_class_specifier? type_name? init_declarator_list (';' or compound_statement) # declaration storage_class_specifier, specifier_qualifier_list, statement = imap( symbol_table.__getitem__, ('__ storage_class_specifier __', '__ specifier_qualifier_list __', '__ statement __')) storage_class = storage_class_specifier(tokens, symbol_table) base_type = specifier_qualifier_list(tokens, symbol_table) expecting_token = TOKENS.SEMICOLON if peek_or_terminal(tokens) == TOKENS.SEMICOLON: yield EmptyDeclaration(loc(consume(tokens)), storage_class) elif peek_or_terminal(tokens) is terminal: raise_error( '{l} Expected TOKENS.COMMA TOKENS.EQUAL TOKENS.SEMICOLON TOKENS.LEFT_BRACE got `{got}`' .format(l=loc(peek(tokens, EOFLocation)), got=peek(tokens, ''))) else: for dec in init_declarator_list(tokens, symbol_table, base_type=base_type, storage_class=storage_class): dec.storage_class = storage_class if isinstance( storage_class, TypeDef ): # init_declarator_list adds the symbol as a decl to symbol_table symbol_table[name(dec)] = (symbol_table.pop( name(dec)) or 1) and c_type(dec) # replace dec by ctype elif peek_or_terminal( tokens) == TOKENS.LEFT_BRACE and not error_if_not_type( c_type(dec), FunctionType): symbol_table = push(symbol_table) symbol_table.update( chain( imap( lambda a: ( name(a), a ), # add non variable list parameters to the symbol table ... ifilterfalse( lambda c: isinstance(c_type(c), VAListType), c_type(dec))), (('__ RETURN_TYPE __', c_type(c_type(dec))), ('__ LABELS __', SymbolTable())))) yield FunctionDefinition(dec, next(statement(tokens, symbol_table))) expecting_token = (pop(symbol_table) or 1) and '' else: yield dec expecting_token = TOKENS.SEMICOLON _ = expecting_token and error_if_not_value(tokens, expecting_token)
def multi_line_comment(char_stream, location): def _values(char_stream): while peek_or_terminal(char_stream) is not terminal: current_value = consume(char_stream) yield current_value # we have consumed a star check if its adjacent value is a forward slash if it is consume and break if current_value == TOKENS.STAR and peek_or_terminal( char_stream) == TOKENS.FORWARD_SLASH: yield consume(char_stream) break _comment = ''.join(_values(char_stream)) if _comment.endswith(TOKENS.END_OF_MULTI_LINE_COMMENT): return MULTI_LINE_COMMENT(''.join(_comment), location) raise_error( '{l} Could no locate end of multi-line comment.'.format(l=location))
def replace_instr(old_instr, new_instr): if id(old_instr) == id( new_instr): # instructions are identical do nothing ... return _ = id( old_instr ) in new_instructions and raise_error( # we are replacing an old instructions twice!! 'We are replacing an old instruction {i} with {n} twice!'.format( i=old_instr, n=new_instr)) if id(old_instr) in old_instructions: # we are replacing a previously new instruction ... # replacing new_instr so we need to replace previous references with this new_instr # instead of the old one ... get all the instructions that where referencing the older instruction ... previous_instructions = old_instructions[id(old_instr)] new_instructions.update( izip(imap(id, previous_instructions), repeat(new_instr))) # update all previous old_instructions[id( new_instr )] = previous_instructions # in case this new instruction should be updated again deleted_instructions.append(old_instr) else: new_instructions[id(old_instr)] = new_instr old_instructions[id(new_instr)].append( old_instr ) # a new instruction may replace more than 1 instruction. return new_instr
def external_declaration(tokens, symbol_table): #storage_class_specifier? type_specifier init_declarator_list (';' or compound_statement) for decl in declarations(tokens, symbol_table): _ = decl and isinstance(decl.storage_class, (Auto, Register)) and raise_error( '{l} declarations at file scope may not have {s} storage class'.format(l=loc(decl), s=decl.storage_class) ) yield decl
def get_declaration_or_definition(decl, storage_class): _ = initialization(decl) and isinstance(storage_class, Extern) and raise_error( '{l} {ident} has both initialization expr and extern storage class'.format(l=loc(decl), ident=name(decl))) if isinstance(c_type(decl), (FunctionType, StructType)) and not name(decl) or isinstance(storage_class, Extern): return Declaration(name(decl), c_type(decl), loc(decl), storage_class) return Definition(name(decl), c_type(decl), initialization(decl), loc(decl), storage_class or Auto(loc(decl)))
def external_declaration(tokens, symbol_table): #storage_class_specifier? type_specifier init_declarator_list (';' or compound_statement) for decl in declarations(tokens, symbol_table): _ = decl and isinstance( decl.storage_class, (Auto, Register)) and raise_error( '{l} declarations at file scope may not have {s} storage class' .format(l=loc(decl), s=decl.storage_class)) yield decl
def convert_declaration_to_definition(decl): _ = isinstance(decl, FunctionDefinition) and raise_error( '{l} Nested function definitions are not allowed.'.format(l=loc(decl))) # Non Function declaration without storage class is set to auto if type(decl) is Declaration and not isinstance( c_type(decl), FunctionType) and decl.storage_class is not Extern: decl = Definition( # all non-function-declarations within compound statements are definitions ... name(decl), c_type(decl), EmptyExpression(c_type(decl), loc(decl)), loc(decl), decl.storage_class or Auto(loc(decl))) return decl
def symbol(char_stream, location): def _values(char_stream): value = '' while value + peek(char_stream) in TOKENS.non_keyword_symbols: current_value = consume(char_stream) value += current_value yield current_value value = ''.join(_values(char_stream)) next_char = peek_or_terminal(char_stream) # if value is a single dot check if the next value is a number for possible float or ellipsis ... if value == TOKENS.DOT and next_char is not terminal: if next_char in digits: # check for float ... return FLOAT(value + number(char_stream), location) if next_char == TOKENS.DOT: # check for ellipsis ... value += consume(char_stream) if peek_or_terminal(char_stream) == TOKENS.DOT: return SYMBOL(value + consume(char_stream), location) # TOKENS.ELLIPSIS raise_error('{l} Unable to tokenize: `{t}`'.format(l=location, t=TOKENS.DOT + TOKENS.DOT)) return SYMBOL(value, location)
def number_literal(char_stream, location): values, sfix = number(char_stream), suffix(char_stream) _ = sfix not in possible_numeric_suffix and raise_error('{0} Invalid numeric suffix {1}'.format(location, sfix)) if peek_or_terminal(char_stream) == TOKENS.DOT: return FLOAT(values + consume(char_stream) + number(char_stream), location) _token_type = INTEGER if any(imap(values.startswith, octal_prefix)) and values != digit(0): _token_type = OCTAL if any(imap(values.startswith, hexadecimal_prefix)): _token_type = HEXADECIMAL return _token_type(values, location, sfix)
def convert_declaration_to_definition(decl): _ = isinstance(decl, FunctionDefinition) and raise_error( '{l} Nested function definitions are not allowed.'.format(l=loc(decl))) # Non Function declaration without storage class is set to auto if type(decl) is Declaration and not isinstance(c_type(decl), FunctionType) and decl.storage_class is not Extern: decl = Definition( # all non-function-declarations within compound statements are definitions ... name(decl), c_type(decl), EmptyExpression(c_type(decl), loc(decl)), loc(decl), decl.storage_class or Auto(loc(decl)) ) return decl
def get_declaration_or_definition(decl, storage_class): _ = initialization(decl) and isinstance( storage_class, Extern) and raise_error( '{l} {ident} has both initialization expr and extern storage class' .format(l=loc(decl), ident=name(decl))) if isinstance(c_type(decl), (FunctionType, StructType)) and not name(decl) or isinstance( storage_class, Extern): return Declaration(name(decl), c_type(decl), loc(decl), storage_class) return Definition(name(decl), c_type(decl), initialization(decl), loc(decl), storage_class or Auto(loc(decl)))
def number_literal(char_stream, location): values, sfix = number(char_stream), suffix(char_stream) _ = sfix not in possible_numeric_suffix and raise_error( '{0} Invalid numeric suffix {1}'.format(location, sfix)) if peek_or_terminal(char_stream) == TOKENS.DOT: return FLOAT(values + consume(char_stream) + number(char_stream), location) _token_type = INTEGER if any(imap(values.startswith, octal_prefix)) and values != digit(0): _token_type = OCTAL if any(imap(values.startswith, hexadecimal_prefix)): _token_type = HEXADECIMAL return _token_type(values, location, sfix)
def declarations(tokens, symbol_table): # storage_class_specifier? type_name? init_declarator_list (';' or compound_statement) # declaration storage_class_specifier, specifier_qualifier_list, statement = imap( symbol_table.__getitem__, ('__ storage_class_specifier __', '__ specifier_qualifier_list __', '__ statement __') ) storage_class = storage_class_specifier(tokens, symbol_table) base_type = specifier_qualifier_list(tokens, symbol_table) expecting_token = TOKENS.SEMICOLON if peek_or_terminal(tokens) == TOKENS.SEMICOLON: yield EmptyDeclaration(loc(consume(tokens)), storage_class) elif peek_or_terminal(tokens) is terminal: raise_error('{l} Expected TOKENS.COMMA TOKENS.EQUAL TOKENS.SEMICOLON TOKENS.LEFT_BRACE got `{got}`'.format( l=loc(peek(tokens, EOFLocation)), got=peek(tokens, '') )) else: for dec in init_declarator_list(tokens, symbol_table, base_type=base_type, storage_class=storage_class): dec.storage_class = storage_class if isinstance(storage_class, TypeDef): # init_declarator_list adds the symbol as a decl to symbol_table symbol_table[name(dec)] = (symbol_table.pop(name(dec)) or 1) and c_type(dec) # replace dec by ctype elif peek_or_terminal(tokens) == TOKENS.LEFT_BRACE and not error_if_not_type(c_type(dec), FunctionType): symbol_table = push(symbol_table) symbol_table.update(chain( imap( lambda a: (name(a), a), # add non variable list parameters to the symbol table ... ifilterfalse(lambda c: isinstance(c_type(c), VAListType), c_type(dec)) ), (('__ RETURN_TYPE __', c_type(c_type(dec))), ('__ LABELS __', SymbolTable())) )) yield FunctionDefinition(dec, next(statement(tokens, symbol_table))) expecting_token = (pop(symbol_table) or 1) and '' else: yield dec expecting_token = TOKENS.SEMICOLON _ = expecting_token and error_if_not_value(tokens, expecting_token)
def switch_statement(stmnt, symbol_table): _ = (not isinstance(c_type(exp(stmnt)), IntegralType)) and raise_error( '{l} Expected an integral type got {g}'.format(g=c_type(exp(stmnt)), l=loc(stmnt))) end_switch = Pass(loc(stmnt)) stmnt.stack = deepcopy(symbol_table['__ stack __']) # if switch inside loop, only update end_instruct, since continue jumps to start of loop break goes to end of switch def body(stmnt, symbol_table, end_switch): symbol_table = push(symbol_table) stack, statement = imap(symbol_table.__getitem__, ('__ stack __', '__ statement __')) symbol_table['__ break __'] = (end_switch, stack.stack_pointer) symbol_table['__ switch __'] = True allocation_table = [ ] # create an allocation table to update stack before jump in case of nested definitions switch_body_instrs = [] cases = {'default': Offset(end_switch, loc(stmnt))} for instr in statement(stmnt.statement, symbol_table): if isinstance(getattr(instr, 'case', None), CaseStatement): start = Pass(loc(instr)) allocation_table.append( chain( (start, ), update_stack(stmnt.stack.stack_pointer, instr.case.stack.stack_pointer, loc(instr)), relative_jump(Offset(instr, loc(instr)), loc(instr)), )) cases[error_if_not_type(exp(exp(instr.case)), (int, long, str))] = Offset( start, loc(instr)) del instr.case switch_body_instrs.append(instr) max_switch_value = 2**(8 * size_arrays_as_pointers(c_type(exp(stmnt)))) - 1 for instr in jump_table(loc(stmnt), cases, allocation_table, max_switch_value, switch_body_instrs): yield instr _ = pop(symbol_table) return chain(symbol_table['__ expression __'](exp(stmnt), symbol_table), body(stmnt, symbol_table, end_switch), (end_switch, ))
def update_default_value(desig_expr, default_values): ctype, desig, expr = c_type(default_values), designation(desig_expr), exp( desig_expr) if desig >= len(default_values): logger.warning( '{0} Excess element {1} {2} in initializer, it will be ignored ... ' .format(loc(desig_expr), desig, expr)) else: _ = (not safe_type_coercion(c_type(expr), c_type( default_values[desig]))) and raise_error( '{l} Unable to coerce from {f} to {t}'.format( l=loc(expr), f=c_type(expr), t=c_type(default_values[desig]))) update_func = update_composite_type_initializer \ if isinstance(ctype, (StructType, ArrayType)) else update_scalar_type_initializer update_func(desig_expr, default_values)
def switch_statement(stmnt, symbol_table): _ = (not isinstance(c_type(exp(stmnt)), IntegralType)) and raise_error( '{l} Expected an integral type got {g}'.format(g=c_type(exp(stmnt)), l=loc(stmnt)) ) end_switch = Pass(loc(stmnt)) stmnt.stack = deepcopy(symbol_table['__ stack __']) # if switch inside loop, only update end_instruct, since continue jumps to start of loop break goes to end of switch def body(stmnt, symbol_table, end_switch): symbol_table = push(symbol_table) stack, statement = imap(symbol_table.__getitem__, ('__ stack __', '__ statement __')) symbol_table['__ break __'] = (end_switch, stack.stack_pointer) symbol_table['__ switch __'] = True allocation_table = [] # create an allocation table to update stack before jump in case of nested definitions switch_body_instrs = [] cases = {'default': Offset(end_switch, loc(stmnt))} for instr in statement(stmnt.statement, symbol_table): if isinstance(getattr(instr, 'case', None), CaseStatement): start = Pass(loc(instr)) allocation_table.append( chain( (start,), update_stack(stmnt.stack.stack_pointer, instr.case.stack.stack_pointer, loc(instr)), relative_jump(Offset(instr, loc(instr)), loc(instr)), ) ) cases[error_if_not_type(exp(exp(instr.case)), (int, long, str))] = Offset(start, loc(instr)) del instr.case switch_body_instrs.append(instr) max_switch_value = 2**(8*size_arrays_as_pointers(c_type(exp(stmnt)))) - 1 for instr in jump_table(loc(stmnt), cases, allocation_table, max_switch_value, switch_body_instrs): yield instr _ = pop(symbol_table) return chain( symbol_table['__ expression __'](exp(stmnt), symbol_table), body(stmnt, symbol_table, end_switch), (end_switch,) )
def replace_instr(old_instr, new_instr): if id(old_instr) == id(new_instr): # instructions are identical do nothing ... return _ = id(old_instr) in new_instructions and raise_error( # we are replacing an old instructions twice!! 'We are replacing an old instruction {i} with {n} twice!'.format(i=old_instr, n=new_instr)) if id(old_instr) in old_instructions: # we are replacing a previously new instruction ... # replacing new_instr so we need to replace previous references with this new_instr # instead of the old one ... get all the instructions that where referencing the older instruction ... previous_instructions = old_instructions[id(old_instr)] new_instructions.update(izip(imap(id, previous_instructions), repeat(new_instr))) # update all previous old_instructions[id(new_instr)] = previous_instructions # in case this new instruction should be updated again deleted_instructions.append(old_instr) else: new_instructions[id(old_instr)] = new_instr old_instructions[id(new_instr)].append(old_instr) # a new instruction may replace more than 1 instruction. return new_instr
def __new__(cls, designation): _ = error_if_not_type( designation, (int, long)) and designation < 0 and raise_error( '{l} array indices must be greater than or equal to 0 got {g}'. format(l=loc(value), g=exp(value))) return super(NumericalDesignation, cls).__new__(cls, designation)
def ERROR(token_seq, *_): t = peek(token_seq) raise_error('{l} error: {m}'.format(l=loc(t), m=' '.join(get_line(token_seq))))
def __new__(cls, values, location=LocationNotSet): _ = values not in TOKENS.pre_processing_directives and raise_error( '{l} Could not locate pre_processing directive {d}'.format( l=location, d=values)) return super(PRE_PROCESSING_SYMBOL, cls).__new__(cls, values, location)
def body(self, location, tokens=(), macros=None): macros = macros or Macros() location = loc(error_if_not_value(tokens, TOKENS.LEFT_PARENTHESIS)) or location all_args = tuple( (args, tuple(expand_all(iter(args), macros))) for args in imap( tuple, arguments(tokens, self.arguments, location))) _ = len(all_args) != len(self.arguments) and raise_error( '{l} Macro function {f} requires {t} arguments but got {g}.'. format(f=self.name, t=len(self.arguments), g=len(all_args), l=location)) def _get_original_arguments(token, arguments): return arguments.get(token, ((token, ), ))[0] def _get_expanded_arguments(token, arguments): return arguments.get(token, (None, (token, )))[1] def _token_body(body_tokens, args): for token in imap(consume, repeat(body_tokens)): if peek_or_terminal( body_tokens ) == TOKENS.PP: # if tokens to be merged use original args unexpanded for t in _get_original_arguments(token, args): yield t yield consume(body_tokens) for t in _get_original_arguments( consume(body_tokens, IGNORE()), args): yield t elif token != TOKENS.PP and token.startswith( TOKENS.NUMBER_SIGN): yield STRING( ' '.join( imap(str, args.get(token[1:], ((token[1:], ), ))[0])), loc(token)) else: for t in _get_expanded_arguments( token, args ): # get expansion if present otherwise token ... yield t def _merge_tokens(tokens): for token in imap(consume, repeat(tokens)): if token == TOKENS.PP: token = IGNORE() while peek_or_terminal(tokens) == TOKENS.PP and consume( tokens): new_token_source = token + consume(tokens, IGNORE()) new_tokens = tokenize( imap( Str, new_token_source, imap( Location, repeat( loc(token).file_name, len(new_token_source)), repeat(loc(token).line_number), count(loc(token).column_number), ))) token = next(new_tokens, IGNORE()) terminal_token = next(new_tokens, terminal) if terminal_token is not terminal: raise ValueError( '{l} token pasting generated more than one token {t} {e}' .format(l=loc(token), t=token, e=terminal_token)) if token == TOKENS.PP: token = IGNORE() yield token return filter_out_empty_tokens( # filter out all the empty/whitespace tokens _merge_tokens( _token_body(iter(self._body), dict(izip(self.arguments, all_args)))))
def __init__(self, start, end, expr, location=LocationNotSet): _ = (end - start) <= 0 and raise_error('{l} designated range {v} produce an empty sequence'.format( l=location, v=' ... '.join(imap(str, (start, end))))) super(RangeDesignatedExpression, self).__init__((start, end), expr, location)
def no_rule(char_stream, location): raise_error('{l} Unable to tokenize {c}'.format(l=location, c=peek(char_stream)))
def __new__(cls, values, location=LocationNotSet): _ = values not in TOKENS.pre_processing_directives and raise_error( '{l} Could not locate pre_processing directive {d}'.format(l=location, d=values) ) return super(PRE_PROCESSING_SYMBOL, cls).__new__(cls, values, location)
def __init__(self, start, end, expr, location=LocationNotSet): _ = (end - start) <= 0 and raise_error( '{l} designated range {v} produce an empty sequence'.format( l=location, v=' ... '.join(imap(str, (start, end))))) super(RangeDesignatedExpression, self).__init__((start, end), expr, location)
def __new__(cls, designation): _ = error_if_not_type(designation, (int, long)) and designation < 0 and raise_error( '{l} array indices must be greater than or equal to 0 got {g}'.format(l=loc(value), g=exp(value))) return super(NumericalDesignation, cls).__new__(cls, designation)
def initialization(self, value): _ = value and c_type(self) and not safe_type_coercion(c_type(self), c_type(value)) and raise_error( '{l} Could not coerce types from {from_type} to {to_type}'.format( l=loc(self), from_type=c_type(value), to_type=c_type(self) )) if isinstance(c_type(self), ArrayType): _ = error_if_not_type(value, (ConstantExpression, CompoundLiteral, Initializer, EmptyExpression)) if c_type(self).length is None and isinstance(value, (Initializer, CompoundLiteral)): c_type(self).length = len(value) self._initialization = value
def update_default_value(desig_expr, default_values): ctype, desig, expr = c_type(default_values), designation(desig_expr), exp(desig_expr) if desig >= len(default_values): logger.warning( '{0} Excess element {1} {2} in initializer, it will be ignored ... '.format( loc(desig_expr), desig, expr )) else: _ = (not safe_type_coercion(c_type(expr), c_type(default_values[desig]))) and raise_error( '{l} Unable to coerce from {f} to {t}'.format(l=loc(expr), f=c_type(expr), t=c_type(default_values[desig])) ) update_func = update_composite_type_initializer \ if isinstance(ctype, (StructType, ArrayType)) else update_scalar_type_initializer update_func(desig_expr, default_values)