def enum_blocks_edges(instructions): """ Return a list of basicblock after statically parsing given instructions """ basicblocks = list() edges = list() xrefs = enumerate_xref(instructions) # create the first block new_block = True for inst in instructions: if new_block: block = BasicBlock(start_offset=inst.offset, start_instr=inst, name='block_%x' % inst.offset) new_block = False # add current instruction to the basicblock block.instructions.append(inst) # next instruction in xrefs list if (inst.offset_end + 1) in xrefs: # absolute JUMP if inst.is_branch_unconditional: edges.append( Edge(block.name, 'block_%x' % xref_of_instr(inst), EDGE_UNCONDITIONAL)) # conditionnal JUMPI / JUMPIF / ... elif inst.is_branch_conditional: edges.append( Edge(block.name, 'block_%x' % xref_of_instr(inst), EDGE_CONDITIONAL_TRUE)) edges.append( Edge(block.name, 'block_%x' % (inst.offset_end + 1), EDGE_CONDITIONAL_FALSE)) # Halt instruction : RETURN, STOP, RET, ... elif inst.is_halt: pass # just falls to the next instruction else: edges.append( Edge(block.name, 'block_%x' % (inst.offset_end + 1), EDGE_FALLTHROUGH)) block.end_offset = inst.offset_end block.end_instr = inst basicblocks.append(block) new_block = True # add the last block basicblocks.append(block) edges = list(set(edges)) return (basicblocks, edges)
def get_functions_call_edges(self, format_fname=False): nodes = list() edges = list() if not self.analyzer: self.analyzer = WasmModuleAnalyzer(self.module_bytecode) if not self.functions: self.functions = enum_func(self.module_bytecode) # create nodes for name, param_str, return_str, _ in self.analyzer.func_prototypes: if format_fname: nodes.append(format_func_name(name, param_str, return_str)) else: nodes.append(name) logging.info('nodes: %s', nodes) # create edges tmp_edges = enum_func_call_edges(self.functions, len(self.analyzer.imports_func)) # tmp_edges = [(node_from, node_to), (...), ...] for node_from, node_to in tmp_edges: # node_from name, param, ret, _ = self.analyzer.func_prototypes[node_from] if format_fname: from_final = format_func_name(name, param, ret) else: from_final = name # node_to name, param, ret, _ = self.analyzer.func_prototypes[node_to] to_final = format_func_name(name, param, ret) if format_fname: to_final = format_func_name(name, param, ret) else: to_final = name edges.append(Edge(from_final, to_final, EDGE_CALL)) logging.info('edges: %s', edges) return (nodes, edges)
def enum_blocks_edges(function_id, instructions): """ Return a list of basicblock after statically parsing given instructions """ basicblocks = list() edges = list() branches = [] xrefs = [] intent = 0 blocks_tmp = [] blocks_list = [] # we need to do that because jump label are relative to the current block index for index, inst in enumerate(instructions[:-1]): if inst.is_block_terminator: start, name = blocks_tmp.pop() if inst.name == 'else': end = inst.offset - 1 else: end = inst.offset_end blocks_list.append((intent, start, end, name)) intent -= 1 if inst.is_block_starter: # in ['block', 'loop', 'if', 'else']: blocks_tmp.append((inst.offset, inst.name)) intent += 1 if inst.is_branch: branches.append((intent, inst)) # add function body end blocks_list.append((0, 0, instructions[-1].offset_end, 'func')) blocks_list = sorted(blocks_list, key=lambda tup: (tup[1], tup[0])) for depth, inst in branches: labl = list() if inst.name == 'br_table': labl = [i for i in inst.insn_byte[2:]] else: labl.append(int(inst.operand_interpretation.split(' ')[-1])) for d2 in labl: rep = next(((i, s, e, n) for i, s, e, n in blocks_list if (i == (depth - d2) and s < inst.offset and e > inst.offset_end)), None) if rep: i, start, end, name = rep # if we branch to a 'loop' label # we go at the entry of the 'loop' block if name == 'loop': value = start # if we branch to a 'block' label # we go at the end of the "block" block elif name == 'block' or name == 'func': value = end # we don't know else: value = None inst.xref.append(value) xrefs.append(value) # assign xref for "if" branch # needed because 'if' don't used label for index, inst in enumerate(instructions[:-1]): if inst.name == 'if': g_block = next(iter([b for b in blocks_list if b[1] == inst.offset]), None) jump_target = g_block[2] + 1 inst.xref.append(jump_target) xrefs.append(jump_target) elif inst.name == 'else': g_block = next(iter([b for b in blocks_list if b[1] == inst.offset]), None) jump_target = g_block[2] + 1 inst.xref.append(jump_target) xrefs.append(jump_target) # enumerate blocks new_block = True for index, inst in enumerate(instructions): # creation of a block if new_block: block = BasicBlock(inst.offset, inst, name=format_bb_name(function_id, inst.offset)) new_block = False # add current instruction to the basicblock block.instructions.append(inst) # next instruction is a jump target if index < (len(instructions) - 1) and \ instructions[index + 1].offset in xrefs: new_block = True # absolute jump - br elif inst.is_branch_unconditional: new_block = True # conditionnal jump - br_if elif inst.is_branch_conditional: new_block = True # is_block_terminator # GRAPHICAL OPTIMIZATION: merge end together elif index < (len(instructions) - 1) and \ instructions[index + 1].name in ['else', 'loop']: # is_block_terminator new_block = True # last instruction of the bytecode elif inst.offset == instructions[-1].offset: new_block = True if new_block: block.end_offset = inst.offset_end block.end_instr = inst basicblocks.append(block) # enumerate edges for index, block in enumerate(basicblocks): # get the last instruction inst = block.end_instr # unconditional jump - br if inst.is_branch_unconditional: for ref in inst.xref: edges.append(Edge(block.name, format_bb_name(function_id, ref), EDGE_UNCONDITIONAL)) # conditionnal jump - br_if, if elif inst.is_branch_conditional: if inst.name == 'if': edges.append(Edge(block.name, format_bb_name(function_id, inst.offset_end + 1), EDGE_CONDITIONAL_TRUE)) if_b = next(iter([b for b in blocks_list if b[1] == inst.offset]), None) #else_block = blocks_list[blocks_list.index(if_block) + 1] jump_target = if_b[2] + 1 edges.append(Edge(block.name, format_bb_name(function_id, jump_target), EDGE_CONDITIONAL_FALSE)) else: for ref in inst.xref: if ref and ref != inst.offset_end + 1: # create conditionnal true edges edges.append(Edge(block.name, format_bb_name(function_id, ref), EDGE_CONDITIONAL_TRUE)) # create conditionnal false edge edges.append(Edge(block.name, format_bb_name(function_id, inst.offset_end + 1), EDGE_CONDITIONAL_FALSE)) # instruction that end the flow elif [i.name for i in block.instructions if i.is_halt]: pass elif inst.is_halt: pass # handle the case when you have if and else following elif inst.offset != instructions[-1].offset and \ block.start_instr.name != 'else' and \ instructions[instructions.index(inst) + 1].name == 'else': else_ins = instructions[instructions.index(inst) + 1] else_b = next(iter([b for b in blocks_list if b[1] == else_ins.offset]), None) edges.append(Edge(block.name, format_bb_name(function_id, else_b[2] + 1), EDGE_FALLTHROUGH)) # add the last intruction "end" in the last block elif inst.offset != instructions[-1].offset: # EDGE_FALLTHROUGH edges.append(Edge(block.name, format_bb_name(function_id, inst.offset_end + 1), EDGE_FALLTHROUGH)) # prevent duplicate edges edges = list(set(edges)) return basicblocks, edges
def emulate(self, state=EvmVMstate(), depth=0): # create fake stack for tests state.symbolic_stack = list(range(1000)) # get current instruction instr = self.reverse_instructions[state.pc] # create the first basicblock of this branch # print('%d : %s' % (instr.offset, instr.name)) self.current_basicblock = self.basicblock_per_instr[instr.offset] # beginning of a function if instr in self.functions_start_instr: # cleaning duplicate block in previous function self.current_function.basicblocks = list( set(self.current_function.basicblocks)) # retrive matching function self.current_function = next( filter(lambda f: f.start_instr == instr, self.functions)) # self.ssa_counter = 0 logging.info("[+] Entering function - %x: ", self.current_function.start_offset, self.current_function.prefered_name) # associate function to basicblock # TODO: create list of function_name self.current_basicblock.function_name = self.current_function.prefered_name # associate basicblock to function self.current_function.basicblocks.append(self.current_basicblock) # halt variable use to catch ending branch halt = False while not halt: # get current instruction instr = self.reverse_instructions[state.pc] # handle fall-thrown due to JUMPDEST if instr.name == 'JUMPDEST': # doesn't match new block that start with JUMPDEST if self.current_basicblock.start_offset != instr.offset: self.edges.append( Edge(self.current_basicblock.name, 'block_%x' % instr.offset, EDGE_FALLTHROUGH)) # get current basicblock self.current_basicblock = self.basicblock_per_instr[instr.offset] self.current_function.basicblocks.append(self.current_basicblock) # add this instruction to his functions # TODO: verify if it's not useless for ethereum self.current_function.instructions.append(instr) # Save instruction and state state.instr = instr self.states[self.states_total] = state state = copy.deepcopy(state) self.states_total += 1 state.pc += 1 # execute single instruction halt = self.emulate_one_instruction(instr, state, depth) state.instructions_visited.append(instr.offset) #state.instructions_visited[instr.offset] = instr.offset logging.info("[X] Returning from basicblock %s", self.current_basicblock.name) # automatic remove duplicated edges self.edges = list(set(self.edges))
def ssa_stack_memory_storage_flow_instruction(self, instr, state, depth): halt = False op = instr.name if op == 'POP': # SSA STACK s0 = state.ssa_stack.pop() instr.ssa = SSA(method_name=instr.name) elif op in ['MLOAD', 'SLOAD']: # SSA STACK s0 = state.ssa_stack.pop() instr.ssa = SSA(new_assignement=self.ssa_counter, method_name=instr.name, args=[s0]) state.ssa_stack.append(instr) self.ssa_counter += 1 elif op in ['MSTORE', 'MSTORE8', 'SSTORE']: # SSA STACK s0, s1 = state.ssa_stack.pop(), state.ssa_stack.pop() instr.ssa = SSA(method_name=instr.name, args=[s0, s1]) elif op == 'JUMP': # SSA STACK push_instr = state.ssa_stack.pop() instr.ssa = SSA(method_name=instr.name, args=[push_instr]) # get instruction with this value as offset if push_instr.ssa.is_constant: #jump_addr = int.from_bytes(push_instr.operand, byteorder='big') jump_addr = push_instr.operand_interpretation # get instruction with this value as offset target = next( filter(lambda element: element.offset == jump_addr, self.instructions)) else: # try to resolve the SSA repr jump_addr = self.simplify_ssa.resolve_instr_ssa(push_instr) target = next( filter(lambda element: element.offset == jump_addr, self.instructions)) if not jump_addr: logging.warning('JUMP DYNAMIC') logging.warning('[X] push_instr %x: %s ' % (push_instr.offset, push_instr.name)) logging.warning('[X] push_instr.ssa %s' % push_instr.ssa.format()) list_args = [ arg.ssa.format() for arg in push_instr.ssa.args ] logging.warning('[X] push_instr.ssa %s' % list_args) return True # depth of 1 - prevent looping #if (depth < self.max_depth): if target.name != "JUMPDEST": logging.info('[X] Bad JUMP to 0x%x' % jump_addr) return True if target.offset not in state.instructions_visited: logging.info('[X] follow JUMP branch offset 0x%x' % target.offset) new_state = copy.deepcopy(state) new_state.pc = self.instructions.index(target) #state.pc = self.instructions.index(target) # follow the JUMP self.edges.append( Edge(self.current_basicblock.name, 'block_%x' % target.offset, EDGE_UNCONDITIONAL)) self.emulate(new_state, depth=depth + 1) halt = True else: #logging.info('[X] Max depth reached, skipping JUMP 0x%x' % jump_addr) self.edges.append( Edge(self.current_basicblock.name, 'block_%x' % target.offset, EDGE_UNCONDITIONAL)) logging.info('[X] Loop detected, skipping JUMP 0x%x' % jump_addr) halt = True self.current_basicblock = self.basicblock_per_instr[instr.offset] elif op == 'JUMPI': # SSA STACK push_instr, condition = state.ssa_stack.pop(), state.ssa_stack.pop( ) instr.ssa = SSA(method_name=instr.name, args=[push_instr, condition]) logging.info('[X] follow JUMPI default branch offset 0x%x' % (instr.offset_end + 1)) new_state = copy.deepcopy(state) self.edges.append( Edge(self.current_basicblock.name, 'block_%x' % (instr.offset_end + 1), EDGE_CONDITIONAL_FALSE)) self.emulate(new_state, depth=depth + 1) self.current_basicblock = self.basicblock_per_instr[instr.offset] # get instruction with this value as offset if push_instr.ssa.is_constant: #jump_addr = int.from_bytes(push_instr.operand, byteorder='big') jump_addr = push_instr.operand_interpretation # get instruction with this value as offset target = next( filter(lambda element: element.offset == jump_addr, self.instructions)) else: # try to resolve the SSA repr jump_addr = self.simplify_ssa.resolve_instr_ssa(push_instr) target = next( filter(lambda element: element.offset == jump_addr, self.instructions)) if not jump_addr: logging.warning('JUMP DYNAMIC') logging.warning('[X] push_instr %x: %s ' % (push_instr.offset, push_instr.name)) logging.warning('[X] push_instr.ssa %s' % push_instr.ssa.format()) list_args = [ arg.ssa.format() for arg in push_instr.ssa.args ] logging.warning('[X] push_instr.ssa %s' % list_args) return True if target.name != "JUMPDEST": logging.info('[X] Bad JUMP to 0x%x' % jump_addr) return True if target.offset not in state.instructions_visited: # condition are True logging.info('[X] follow JUMPI branch offset 0x%x' % (target.offset)) new_state = copy.deepcopy(state) new_state.pc = self.instructions.index(target) # follow the JUMPI self.edges.append( Edge(self.current_basicblock.name, 'block_%x' % target.offset, EDGE_CONDITIONAL_TRUE)) self.emulate(new_state, depth=depth + 1) else: self.edges.append( Edge(self.current_basicblock.name, 'block_%x' % target.offset, EDGE_CONDITIONAL_TRUE)) logging.warning('[X] Loop detected, skipping JUMPI 0x%x' % jump_addr) logging.warning('[X] push_instr.ssa %s' % push_instr.ssa.format()) halt = True halt = True elif op in ['PC', 'MSIZE', 'GAS']: # SSA STACK instr.ssa = SSA(new_assignement=self.ssa_counter, method_name=instr.name) state.ssa_stack.append(instr) self.ssa_counter += 1 elif op == 'JUMPDEST': # SSA STACK instr.ssa = SSA(method_name=instr.name) return halt
def enum_blocks_edges(function_id, instructions): """ Return a list of basicblock after statically parsing given instructions """ basicblocks = list() edges = list() branches = [] xrefs = [] intent = 0 blocks_tmp = [] blocks_list = [] # remove last instruction that is 'end' for the funtion #tt = instructions[:-1] for index, inst in enumerate(instructions[:-1]): if inst.is_block_terminator: start, name = blocks_tmp.pop() blocks_list.append((intent, start, inst.offset_end, name)) intent -= 1 if inst.is_block_starter: # in ['block', 'loop']: blocks_tmp.append((inst.offset, inst.name)) intent += 1 if inst.is_branch: branches.append((intent, inst)) # add function body end blocks_list.append((0, 0, instructions[-1].offset_end, 'func')) blocks_list = sorted(blocks_list, key=lambda tup: tup[1]) for depth, inst in branches: d2 = int(inst.operand_interpretation.split(' ')[-1]) rep = next(((i, s, e, n) for i, s, e, n in blocks_list if ( i == (depth - d2) and s < inst.offset and e > inst.offset_end)), None) if rep: i, start, end, name = rep if name == 'loop': value = start # else name == 'block' elif name == 'block' or name == 'func': value = end else: value = None inst.xref = value xrefs.append(value) # remove "block" instruction - not usefull graphicaly # instructions = [x for x in instructions if x.name not in ['block', 'loop']] # enumerate blocks new_block = True for index, inst in enumerate(instructions): # creation of a block if new_block: block = BasicBlock(inst.offset, inst, name=format_bb_name(function_id, inst.offset)) new_block = False # add current instruction to the basicblock block.instructions.append(inst) # next instruction is a jump target if index < (len(instructions) - 1) and \ instructions[index + 1].offset in xrefs: new_block = True # absolute jump - br elif inst.is_branch_unconditional: new_block = True # conditionnal jump - br_if elif inst.is_branch_conditional: new_block = True # end of a block elif index < (len(instructions) - 1) and \ inst.name in ['end']: # is_block_terminator new_block = True elif index < (len(instructions) - 1) and \ instructions[index + 1].name == 'else': # is_block_terminator new_block = True # start of a block elif index < (len(instructions) - 1) and \ instructions[index + 1].is_block_starter: new_block = True # last instruction of the bytecode elif inst.offset == instructions[-1].offset: new_block = True if new_block: block.end_offset = inst.offset_end block.end_instr = inst basicblocks.append(block) new_block = True # TODO: detect and remove end instruction that end loop # enumerate edges for index, block in enumerate(basicblocks): # get the last instruction inst = block.end_instr # unconditional jump - br if inst.is_branch_unconditional: if inst.xref is not None: edges.append( Edge(block.name, format_bb_name(function_id, inst.xref), EDGE_UNCONDITIONAL)) else: log.error('Bad branch target') # conditionnal jump - br_if, if elif inst.is_branch_conditional: if inst.name == 'if': edges.append( Edge(block.name, format_bb_name(function_id, inst.offset_end + 1), EDGE_CONDITIONAL_TRUE)) edges.append( Edge( block.name, format_bb_name( function_id, basicblocks[index + 2].start_instr.offset), EDGE_CONDITIONAL_FALSE)) else: if inst.xref is not None: edges.append( Edge(block.name, format_bb_name(function_id, inst.xref), EDGE_CONDITIONAL_TRUE)) else: log.error('Bad branch target') edges.append( Edge(block.name, format_bb_name(function_id, inst.offset_end + 1), EDGE_CONDITIONAL_FALSE)) elif inst.offset != instructions[-1].offset: # EDGE_FALLTHROUGH edges.append( Edge(block.name, format_bb_name(function_id, inst.offset_end + 1), EDGE_FALLTHROUGH)) # prevent duplicate edges edges = list(set(edges)) return basicblocks, edges