def __init__(self, verbosity='debug'): Logger.set_level(verbosity) self.builtins = [ Print(), PrintLn(), Eq(), RefEq(), Len(), IsEmpty(), Add() ]
def generate_generic_code(self, ast: SPLFile, env: Env): Logger.info( '-------------------------------------------------------------') Logger.info( '----------------- Starting code generation ------------------') Logger.info( '-------------------------------------------------------------') Logger.info('* Starting generation of intermediate code') generic_functions_code = GenericGenerator(ast, env, self.builtins).generate() Logger.info('- Generation of intermediate code DONE') return generic_functions_code
def compile(self, input_path: str, platform: str, out_path: str): ast = self.parse_input(input_path) env = self.analysis(ast) generic = self.generate_generic_code(ast, env) if platform == 'ssm': self.generate_ssm_code(generic, out_path) elif platform == 'x64': self.generate_x64_code(generic, out_path) else: Logger.error(f'Unknown target platform {platform}') sys.exit(1) print(f'Compilation of {input_path} to {out_path} succeeded')
def generate_x64_code(self, generic_functions_code: List, out_path: str): Logger.info('* Starting generation of x86_64 code') gen = X64Generator(generic_functions_code) gen.generate_x64_instructions() Logger.info('- Generation of x86_64 code DONE') Logger.info(f'* Writing x86_64 code to file {out_path}.asm') gen.write_to_file(out_path + '.asm') Logger.info('* Assembling and linking x86_64 code') gen.assemble_and_link(out_path + '.asm', out_path)
def generate_function_code(self, fun_instance: FunctionInstance, fun_impl: FunctionImpl): gen_type = 'entry point' if fun_instance.entry_point else 'function' Logger.debug(f'Generating code for {gen_type} \'{fun_instance.name}\' ' f'(instance: \'{fun_instance.create_identifier()}\')') if fun_instance.entry_point: self.declare_globals(fun_impl) insts = [] local_vars_size = 0 for op in fun_impl.ops: if isinstance(op, gen_codes.LdLoc): off = op.local.offset() + 1 if off > local_vars_size: local_vars_size = off elif isinstance(op, gen_codes.StLoc): off = op.local.offset() + 1 if off > local_vars_size: local_vars_size = off dm = X64DataManager(len(fun_instance.arg_types), local_vars_size) dm.add_push_instr(insts, operand.Direct(Reg.RBP)) # Save rbp insts.append(Instruction(Mnemonic.MOV, operand.Direct(Reg.RBP), operand.Direct(Reg.RSP))) # Use rbp for access to local vars # Store object ref to update stack size later stack_size = local_vars_size * 8 if (local_vars_size * 8) % 16 == 0 else (local_vars_size * 8) + 8 stack_size_op = operand.Literal(stack_size) insts.append(Instruction(Mnemonic.SUB, operand.Direct(Reg.RSP), stack_size_op)) # Make room for local vars # Push callee preserved regs to stack to be able to restore at function end for reg in self.callee_preserved_regs: dm.add_push_instr(insts, operand.Direct(reg)) for op in fun_impl.ops: self.map_generic_to_ssm(fun_instance, dm, op, insts) self.x64_code['f_' + fun_instance.create_identifier() if not fun_instance.entry_point else '_main'] = insts # Update stack size after knowing how much was used stack_usage = dm.max_stack_used * 8 if stack_usage > stack_size_op.lit: stack_size_op.lit = stack_usage if stack_usage % 16 == 0 else stack_usage + 8
def generate_ssm_code(self, generic_functions_code: List, out_path: str): Logger.info('* Starting generation of SSM code') ssm_generator = SSMGenerator(generic_functions_code) ssm_generator.generate() Logger.info('- Generation of SSM code DONE') Logger.info(f'* Writing SSM code to file: {out_path}.ssm') ssm_generator.to_file(out_path + '.ssm')
def parse_balanced_brackets(self, start: Token, f: Callable, end: Token): try: t1 = self.tr.require_type(start.token_type) except: self.errors.append( ExpectedSymbolError(start.value, self.tr.current)) raise ParseError() try: result = f() except RecursionError: Logger.debug( f"Recursion error caught parsing from {start.value} to {end.value}" ) result = Error() self.errors.append(TooManyBracketsError(t1)) self.tr.skip_to(end) except Exception as e: # if it's a block, try moving one context up, otherwise try to find closing bracket Logger.debug( f"Exception caught parsing from {start.value} to {end.value}, skipping to closing bracket: {e}" ) result = Error() if end.token_type == TokenType.CURLY_CLOSE: self.tr.move_context_up() return result.with_code_range( CodeRange(t1.code_range.start, self.tr.current_code_range())) else: self.tr.skip_to(end) try: t2 = self.tr.require_type(end.token_type) # code_range = CodeRange(t1.code_range.start, t2.code_range.end) return result except: self.errors.append( UnbalancedBracketsError(start, end, t1.code_range, self.tr.current_code_range())) raise ParseError()
def assemble_and_link(self, file_path: str, out_path: str): object_path = out_path + '.o' cp = subprocess.run([ 'nasm', '-g', # Include debug info '-f macho64', # Macho-O file format '-o', object_path, file_path, ]) if cp.returncode == 0: cp = subprocess.run([ 'ld', '/usr/lib/libSystem.dylib', # Include C library object_path, # object file '-o', out_path ]) if cp.returncode == 0: Logger.info(f'Input file {file_path} assembled and linked to {out_path}') else: Logger.error('Error: linker returned non-zero status code') else: Logger.error('Error: assembler returned non-zero status code')
def infer_type(self, env: Env, sigma: InferenceType): Logger.debug(f'* Start typing function {self.name.value}') assert isinstance(self.arg_ids, list), 'Binding analysis must be done before type inference' if self.name.value == 'main': assert len(self.arg_ids) == 0, \ f"Function 'main' cannot take arguments, but is defined with {len(self.arg_ids)}" env.add_fun(self.name.value, self.arg_ids) name = self.name.value f = env.functions.get(name) env.update_fun_quants(name, []) star = Subst.empty() if self.fun_type is not None: args_len = len(f.usage.arg_types) types_len = len(self.fun_type.args.args) if args_len != types_len: raise FunArgsTypesMismatch(self.code_range, name, args_len, types_len) for arg_tv, arg_type_def in zip(f.usage.arg_types, self.fun_type.args.args): star = arg_type_def.infer_type(env, arg_tv).compose(star) env.substitute(star) star = self.fun_type.return_type.infer_type(env, f.usage.return_type).compose(star) env.substitute(star) star = self.block.infer_type(env, f.usage.return_type.substitute(star)).compose(star) f = env.functions.get(name) # Update the quantifiers type_vars = [] for arg_tv in f.usage.arg_types: type_vars = arg_tv.substitute(star).collect_type_vars(type_vars) Logger.debug(f'TVs in resulting function type before removing free TVs= {f.usage}: {type_vars}') # Remove free variables in env from the TVs we're going to quantify over free_env_type_vars = env.free_type_vars( lambda fun_name: fun_name != name ) Logger.debug(f'Free TVs in env: {free_env_type_vars}') type_vars = [x for x in type_vars if x not in free_env_type_vars] Logger.debug(f'TVs after removing free TVs: {type_vars}') env.update_fun_quants(name, type_vars) f = env.functions.get(name) postponed = env.postponed_functions.pop(name, None) if postponed is not None: for (instance_type, inst_code_range) in postponed: ft = f.instantiate(env) args_len = len(instance_type.arg_types) tv_len = len(ft.usage.arg_types) if tv_len != args_len: raise FunCallArgsMismatch(self.code_range, self.name.value, args_len, tv_len) for actual, instance in zip(ft.usage.arg_types, instance_type.arg_types): Logger.debug(f'Postponed function signature check: {actual} <-> {instance}') star = actual.unify_or_type_error(instance.substitute(star), inst_code_range).compose(star) env.substitute(star) star = instance_type.return_type.substitute(star)\ .unify_or_type_error(ft.usage.return_type.substitute(star), inst_code_range) env.substitute(star) Logger.debug(f'- Finished typing function {name}\n') return star
def analysis(self, ast: SPLFile): Logger.info( '-------------------------------------------------------------') Logger.info( '------------------ Starting analysis phase ------------------') Logger.info( '-------------------------------------------------------------') Logger.info('* Starting return value checking') rvc = ReturnValueChecker() return_warnings, return_errors = rvc.check_spl_file(ast) Logger.info('- Return value checking DONE') if len(return_warnings) > 0: for w in return_warnings: Logger.warning(w) if len(return_errors) > 0: for e in return_errors: Logger.error(e) sys.exit(1) context = Context() for b in self.builtins: b.add_to_context(context) Logger.info( f'- Added {len(self.builtins)} builtin functions to binding context: {self.get_builtin_str()}' ) binding_feedback = {'errors': [], 'warnings': []} Logger.info('* Starting binding analysis') ast.binding_analysis(context, binding_feedback) Logger.info('- Binding analysis DONE') Logger.info( '*** Pretty printing AST with identifier IDs after binding analysis: ***' ) Logger.info('\n' + ast.indented_print()) if len(binding_feedback['warnings']) > 0: for w in binding_feedback['warnings']: Logger.warning(w) if len(binding_feedback['errors']) > 0: for e in binding_feedback['errors']: Logger.error(e) sys.exit(1) env = Env() for b in self.builtins: b.add_to_env(env) Logger.info( f'- Added {len(self.builtins)} builtin functions to type environment: {self.get_builtin_str()}' ) subst = Subst.empty() Logger.info('* Starting type inference') try: subst = ast.infer_type(env, InferenceVoid()) except Exception as e: Logger.error(str(e)) # raise e sys.exit(1) env.substitute(subst) Logger.debug('* Inferred function types after inference:') for name, f in env.functions.items(): Logger.debug( f'- {name} :: args: [{", ".join(str(a) for a in f.usage.arg_types)}], ret: {str(f.usage.return_type)}' ) Logger.debug('* Inferred variable types after inference:') for num, v in env.variables.items(): Logger.debug(f'- {num} :: {str(v)}') Logger.info('- Typing DONE') return env
def parse_input(self, path: str): f = open(path, 'r') InputHandler.set_input_text(f.read()) Logger.info( '-------------------------------------------------------------') Logger.info( '------------------- Starting parsing phase ------------------') Logger.info( '-------------------------------------------------------------') lexer = Lexer() Logger.info('* Starting lexing') tokens = lexer.lex_input() Logger.info('- Lexing DONE') Logger.debug('*** Printing lexed tokens: ***') for i, t in enumerate(tokens): Logger.debug('{token_type}::{value}'.format( token_type=t.token_type, value=t.value)) if len(lexer.lex_errors) > 0: for e in lexer.lex_errors: Logger.error(e) sys.exit(1) tr = TokenReader(tokens) parser = Parser(tr) Logger.info('* Starting parsing') ast = parser.parse_spl() Logger.info('- Parsing DONE') Logger.info('*** Pretty printing AST: ***') Logger.info('\n' + ast.indented_print()) if len(parser.errors) > 0: for e in parser.errors: Logger.error(e) sys.exit(1) return ast