def get_graph(self, addr): from capstone import CS_OP_IMM ARCH_UTILS = self.load_arch_module().utils curr = self.lazy_disasm(addr) gph = Graph(self, addr) rest = [] start = time.clock() while 1: if not gph.exists(curr): if ARCH_UTILS.is_uncond_jump(curr) and len(curr.operands) > 0: if curr.operands[0].type == CS_OP_IMM: addr = curr.operands[0].value.imm nxt = self.lazy_disasm(addr) gph.set_next(curr, nxt) rest.append(nxt.address) else: # Can't interpret jmp ADDR|reg gph.add_node(curr) gph.uncond_jumps_set.add(curr.address) elif ARCH_UTILS.is_cond_jump(curr) and len(curr.operands) > 0: if curr.operands[0].type == CS_OP_IMM: nxt_jump = self.lazy_disasm(curr.operands[0].value.imm) direct_nxt = self.lazy_disasm(curr.address + curr.size) gph.set_cond_next(curr, nxt_jump, direct_nxt) rest.append(nxt_jump.address) rest.append(direct_nxt.address) else: # Can't interpret jmp ADDR|reg gph.add_node(curr) gph.cond_jumps_set.add(curr.address) elif ARCH_UTILS.is_ret(curr): gph.add_node(curr) else: try: nxt = self.lazy_disasm(curr.address + curr.size) gph.set_next(curr, nxt) rest.append(nxt.address) except: gph.add_node(curr) pass try: curr = self.lazy_disasm(rest.pop()) except IndexError: break if self.binary.type == T_BIN_PE: self.binary.pe_reverse_stripped_symbols(self) elapsed = time.clock() elapsed = elapsed - start debug__("Graph built in %fs" % elapsed) return gph
def generate_ast(ctx__, paths): global ctx ctx = ctx__ start = time.clock() ast = get_ast_branch(ctx, paths) elapsed = time.clock() elapsed = elapsed - start debug__("Ast generated in %fs" % elapsed) # Process ast start = time.clock() for func in ctx.libarch.registered: func(ctx, ast) elapsed = time.clock() elapsed = elapsed - start debug__("Functions for processing ast in %fs" % elapsed) if ctx.color: ctx.libarch.process_ast.assign_colors(ctx, ast) return ast
def __init__(self, mem, filename, raw_type=None, raw_base=None, raw_big_endian=None): self.__binary = None self.reverse_symbols = {} # ad -> name self.symbols = {} # name -> ad self.section_names = {} self.type = None self._abs_sections = {} # start section -> SectionAbs self._sorted_sections = [] # bisect list, contains section start address if raw_type != None: import lib.fileformat.raw as LIB_RAW self.__binary = LIB_RAW.Raw(self, filename, raw_type, raw_base, raw_big_endian) self.type = T_BIN_RAW return start = time() self.load_magic(filename) if self.type == T_BIN_ELF: import lib.fileformat.elf as LIB_ELF self.__binary = LIB_ELF.ELF(mem, self, filename) elif self.type == T_BIN_PE: import lib.fileformat.pe as LIB_PE self.__binary = LIB_PE.PE(mem, self, filename) else: raise ExcFileFormat() elapsed = time() elapsed = elapsed - start debug__("Binary loaded in %fs" % elapsed)
def __loop_detection(self, ctx, entry): start = time.clock() self.__explore(entry, set(), set(), {}, None) self.__search_equiv_loops() self.__search_false_loops() for k in self.false_loops: del self.loops_all[k] # Search inifinite loops self.infinite_loop = set() for l_curr_loop, l_set in self.loops_all.items(): if self.__is_inf_loop(l_set): self.infinite_loop.add(l_curr_loop) # Save first address of loops for _, start in self.loops_all: self.loops_start.add(start) # search last node which force to looping for (l_prev_loop, l_start), l_set in self.loops_all.items(): self.last_loop_node[(l_prev_loop, l_start)] = set() self.__search_last_loop_node(set(), l_prev_loop, l_start, l_set) elapsed = time.clock() elapsed = elapsed - start debug__("Exploration: found %d loop in %fs" % (len(self.loops_all), elapsed))
def __init__(self, filename, raw_type=None, raw_base=None, raw_big_endian=None): self.__binary = None self.reverse_symbols = {} self.symbols = {} self.type = None if raw_type != None: import lib.fileformat.raw as LIB_RAW self.__binary = LIB_RAW.Raw(filename, raw_type, raw_base, raw_big_endian) self.type = T_BIN_RAW return start = time.clock() self.load_magic(filename) if self.type == T_BIN_ELF: import lib.fileformat.elf as LIB_ELF self.__binary = LIB_ELF.ELF(self, filename) elif self.type == T_BIN_PE: import lib.fileformat.pe as LIB_PE self.__binary = LIB_PE.PE(self, filename) else: raise ExcFileFormat() elapsed = time.clock() elapsed = elapsed - start debug__("Binary loaded in %fs" % elapsed)
def loop_detection(self, entry, bypass_false_search=False): start = time() # Equivalent loops at a same deep in the loops dependencies tree self.deep_equiv = set() # For one loop : contains all address of the loop only self.loops_set = {} # For one loop : contains all address of the loop and sub-loops self.loops_all = {} # Loop dependencies self.deps = {} self.rev_deps = {} # Loops marked as "False" self.false_loops = set() if len(self.nodes) > MAX_NODES: self.skipped_loops_analysis = True return self.__explore(entry, set(), set(), {}, None, set()) self.roots = self.loops_set.keys() - self.rev_deps.keys() self.__prune_loops() if not bypass_false_search: self.__search_false_loops() self.__search_same_deep_equiv_loops() self.__update_loops() # Compute all address which are not in a loop in_loop = set() for l in self.loops_set.items(): in_loop.update(l[1]) # Rest of all address which are not in a loop self.not_in_loop = self.nodes.keys() - in_loop # Search inifinite loops self.infinite_loop = set() for l_curr_loop, l_set in self.loops_all.items(): if self.__is_inf_loop(l_set): self.infinite_loop.add(l_curr_loop) # Save first address of loops self.loops_start = set() for _, l_start in self.loops_all: self.loops_start.add(l_start) # For each loop we search the last node that if we enter in it, # we are sure to return to the loop. self.last_loop_node = {} for (l_prev_loop, l_start), l_set in self.loops_all.items(): self.last_loop_node[(l_prev_loop, l_start)] = set() self.__search_last_loop_node(set(), l_prev_loop, l_start, l_set) elapsed = time() elapsed = elapsed - start debug__("Exploration: found %d loop(s) in %fs" % (len(self.loops_all), elapsed))
def load_symbols(self): start = time() self.__binary.load_static_sym() self.__binary.load_dyn_sym() elapsed = time() elapsed = elapsed - start debug__("Found %d symbols in %fs" % (len(self.symbols), elapsed))
def __loop_detection(self, ctx, entry): start = time.clock() waiting = {} self.__explore(entry, set(), set(), waiting, None, set()) self.__search_equiv_loops() self.__search_false_loops() for k in self.false_loops: del self.loops_all[k] # Search inifinite loops self.infinite_loop = set() for l_curr_loop, l_set in self.loops_all.items(): if self.__is_inf_loop(l_set): self.infinite_loop.add(l_curr_loop) # Save first address of loops for _, start in self.loops_all: self.loops_start.add(start) # search last node which force to looping for (l_prev_loop, l_start), l_set in self.loops_all.items(): self.last_loop_node[(l_prev_loop, l_start)] = set() self.__search_last_loop_node(set(), l_prev_loop, l_start, l_set) elapsed = time.clock() elapsed = elapsed - start debug__("Exploration: found %d loop in %fs" % (len(self.loops_all), elapsed))
def decompile(self): self.is_dump = False self.gph, pe_nb_new_syms = self.gctx.dis.get_graph(self.entry) if self.gph is None: error("capstone can't disassemble here") return None self.gph.simplify() if self.gctx.db.loaded and pe_nb_new_syms: self.gctx.db.modified = True try: self.gph.loop_detection(self.entry) ast, correctly_ended = generate_ast(self) if not correctly_ended: debug__("Second try...") self.gph.loop_detection(self.entry, True) ast, _ = generate_ast(self) self.ast = ast except ExcIfelse as e: error("can't have a ifelse here %x" % e.addr) if self.gctx.interactive_mode: return None die() o = self.gctx.libarch.output.Output(self) o._ast(self.entry, ast) self.output = o return o
def pe_reverse_stripped_symbols(self, dis): start = time.clock() n = self.__binary.pe_reverse_stripped_symbols(dis) elapsed = time.clock() elapsed = elapsed - start debug__("Found %d imported symbols (PE) in %fs" % (n, elapsed))
def __explore(self, entry): paths = Paths() start = time.clock() self.__rec_explore(paths, [], entry) elapsed = time.clock() elapsed = elapsed - start debug__("Exploration: found %d paths and %d loop-paths in %fs" % (len(paths.paths), len(paths.looping), elapsed)) return paths
def load_extra(self): start = time.clock() self.__binary.load_static_sym() self.__binary.load_dyn_sym() self.__binary.load_data_sections() elapsed = time.clock() elapsed = elapsed - start debug__("Found %d symbols in %fs" % (len(self.symbols), elapsed))
def __compute_nested(self): start = time.clock() for k in range(len(self.loops)): self.nested_loops_idx[k] = set() self.direct_nested_idx[k] = set() has_parent_loop_idx = set() for k, l in enumerate(self.loops): self.nested_loops_idx[k] = set() self.direct_nested_idx[k] = set() for k1, l1 in enumerate(self.loops): if k1 in self.marked: continue for addr in l1[1:]: # check if addr is a beginning of another loop # found = -1 for k2, l2 in enumerate(self.loops): if k2 in self.marked or \ self.loops_set[k1] == self.loops_set[k2]: continue if l2[0] == addr: self.direct_nested_idx[k1].add(k2) self.nested_loops_idx[k1].add(k2) has_parent_loop_idx.add(k2) # Warning : sometimes a sub-nested-loop didn't appear in a # parent-parent-loop. So we search for new nested. # See tests/nestedloop5 : # the path of the third loop is not in the first one while 1: moved = False for parent in self.nested_loops_idx: l_par = self.nested_loops_idx[parent] for nest in list(l_par): for subnest in self.nested_loops_idx[nest]: if subnest not in l_par: l_par.add(subnest) has_parent_loop_idx.add(subnest) moved = True if not moved: break self.direct_nested_idx[-1] = set(range(len( self.loops))) - has_parent_loop_idx self.nested_loops_idx[-1] = set(range(len(self.loops))) elapsed = time.clock() elapsed = elapsed - start debug__("Nested loops computed in %fs" % elapsed)
def __compute_nested(self): start = time.clock() for k in range(len(self.loops)): self.nested_loops_idx[k] = set() self.direct_nested_idx[k] = set() has_parent_loop_idx = set() for k, l in enumerate(self.loops): self.nested_loops_idx[k] = set() self.direct_nested_idx[k] = set() for k1, l1 in enumerate(self.loops): if k1 in self.marked: continue for addr in l1[1:]: # check if addr is a beginning of another loop # found = -1 for k2, l2 in enumerate(self.loops): if k2 in self.marked or \ self.loops_set[k1] == self.loops_set[k2]: continue if l2[0] == addr: self.direct_nested_idx[k1].add(k2) self.nested_loops_idx[k1].add(k2) has_parent_loop_idx.add(k2) # Warning : sometimes a sub-nested-loop didn't appear in a # parent-parent-loop. So we search for new nested. # See tests/nestedloop5 : # the path of the third loop is not in the first one while 1: moved = False for parent in self.nested_loops_idx: l_par = self.nested_loops_idx[parent] for nest in list(l_par): for subnest in self.nested_loops_idx[nest]: if subnest not in l_par: l_par.add(subnest) has_parent_loop_idx.add(subnest) moved = True if not moved: break self.direct_nested_idx[-1] = set(range(len(self.loops))) - has_parent_loop_idx self.nested_loops_idx[-1] = set(range(len(self.loops))) elapsed = time.clock() elapsed = elapsed - start debug__("Nested loops computed in %fs" % elapsed)
def __loop_detection(self, ctx, entry): start = time() self.__explore(entry, set(), set(), {}, None, set()) self.__search_equiv_loops() elapsed = time() elapsed = elapsed - start debug__("Exploration: found %d loop(s) in %fs" % (len(self.loops_all), elapsed)) self.html_graph([]) self.__search_false_loops() for k in self.false_loops: del self.loops_all[k] del self.loops_set[k] # Compute all address which are not in a loop in_loop = set() for l in self.loops_set.items(): in_loop.update(l[1]) self.not_in_loop = self.nodes.keys() - in_loop # Search inifinite loops self.infinite_loop = set() for l_curr_loop, l_set in self.loops_all.items(): if self.__is_inf_loop(l_set): self.infinite_loop.add(l_curr_loop) # Save first address of loops for _, l_start in self.loops_all: self.loops_start.add(l_start) # search last node which force to looping for (l_prev_loop, l_start), l_set in self.loops_all.items(): self.last_loop_node[(l_prev_loop, l_start)] = set() self.__search_last_loop_node(set(), l_prev_loop, l_start, l_set) elapsed = time() elapsed = elapsed - start debug__("Exploration: found %d loop(s) in %fs" % (len(self.loops_all), elapsed))
def __simplify(self): ARCH_UTILS = self.dis.load_arch_module().utils nodes = list(self.nodes.keys()) start = time.clock() for ad in nodes: inst = self.nodes[ad] if ARCH_UTILS.is_jump(inst[0]): continue if ad not in self.link_in or len(self.link_in[ad]) != 1 or \ ad == self.entry_point_addr: continue pred = self.link_in[ad][0] # don't fuse with jumps if ARCH_UTILS.is_jump(self.nodes[pred][0]): continue if pred not in self.link_out or len(self.link_out[pred]) != 1: continue if ad in self.link_out: self.link_out[pred] = self.link_out[ad] else: del self.link_out[pred] self.nodes[pred] += self.nodes[ad] if ad in self.link_out: del self.link_out[ad] del self.link_in[ad] del self.nodes[ad] # replace all addr wich refers to ad for k, lst_i in self.link_in.items(): try: lst_i[lst_i.index(ad)] = pred except ValueError: pass elapsed = time.clock() elapsed = elapsed - start debug__("Graph simplified in %fs" % elapsed)
def simplify(self): nodes = list(self.nodes.keys()) start = time() for ad in nodes: if ad in self.uncond_jumps_set or ad in self.cond_jumps_set: continue if ad not in self.link_in or len(self.link_in[ad]) != 1 or \ ad == self.entry_point_addr: continue pred = self.link_in[ad][0] # don't fuse with jumps if pred in self.uncond_jumps_set or pred in self.cond_jumps_set: continue if pred not in self.link_out or len(self.link_out[pred]) != 1: continue if ad in self.link_out: self.link_out[pred] = self.link_out[ad] else: del self.link_out[pred] self.nodes[pred] += self.nodes[ad] if ad in self.link_out: del self.link_out[ad] del self.link_in[ad] del self.nodes[ad] # replace all addr wich refers to ad for k, lst_i in self.link_in.items(): if ad in lst_i: lst_i[lst_i.index(ad)] = pred elapsed = time() elapsed = elapsed - start debug__("Graph simplified in %fs (%d nodes)" % (elapsed, len(self.nodes)))
def disasm(ctx): ctx.gph, pe_nb_new_syms = ctx.dis.get_graph(ctx.entry_addr) if ctx.gph == None: error("capstone can't disassemble here") return None ctx.gph.simplify() if ctx.db.loaded and pe_nb_new_syms: ctx.db.modified = True try: ctx.gph.loop_detection(ctx, ctx.entry_addr) ast, correctly_ended = generate_ast(ctx) if not correctly_ended: debug__("Second try...") ctx.gph.loop_detection(ctx, ctx.entry_addr, True) ast, _ = generate_ast(ctx) except ExcIfelse as e: error("can't have a ifelse here %x" % e.addr) if ctx.interactive_mode: return None die() if ctx.graph: ctx.gph.dot_graph(ctx.dis.jmptables) if ctx.vim: base = os.path.basename(ctx.filename) + "_" + ctx.entry # re-assign if no colors ctx.libarch.process_ast.assign_colors(ctx, ast) ctx.color = False generate_vim_syntax(ctx, base + ".vim") sys.stdout = open(base + ".rev", "w+") o = ctx.libarch.output.Output(ctx) o._ast(ctx.entry_addr, ast) if ctx.vim: print("Run : vim {0}.rev -S {0}.vim".format(base), file=sys.stderr) return o
def __init__(self, filename, raw_type=None): self.__binary = None self.reverse_symbols = {} self.symbols = {} self.type = None if raw_type != None: import lib.fileformat.raw as LIB_RAW self.__binary = LIB_RAW.Raw(filename, raw_type) self.type = T_BIN_RAW return start = time.clock() self.load_magic(filename) if self.type == T_BIN_ELF: import lib.fileformat.elf as LIB_ELF self.__binary = LIB_ELF.ELF(self, filename) elif self.type == T_BIN_PE: import lib.fileformat.pe as LIB_PE self.__binary = LIB_PE.PE(self, filename) else: raise ExcFileFormat() elapsed = time.clock() elapsed = elapsed - start debug__("Binary loaded in %fs" % elapsed) start = time.clock() self.__binary.load_static_sym() self.__binary.load_dyn_sym() self.__binary.load_data_sections() elapsed = time.clock() elapsed = elapsed - start debug__("Found %d symbols in %fs" % (len(self.symbols), elapsed))
def generate_ast(ctx__): global ctx ctx = ctx__ start = time() ast = Ast_Branch() ast.parent = None stack = [(ast, [], -1, ctx.entry, -1)] visited = set() waiting = {} ast_head = ast fake_br = Ast_Branch() fake_br.level = sys.maxsize while stack or waiting: if not stack and waiting: if not ctx.gph.skipped_loops_analysis: break for ad in set(waiting): waiting[ad].unseen.clear() stack.append((fake_br, [], -1, ad, -1)) ast, loops_stack, prev, curr, else_addr = stack.pop(-1) # Check if we enter in a false loop (see gotoinloop*) if loops_stack: _, _, l_start = loops_stack[-1] else: l_start = ctx.entry if (l_start, curr) in ctx.gph.false_loops: continue blk = ctx.gph.nodes[curr] # Exit the current loop while loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] l_set = ctx.gph.loops_all[(l_prev_loop, l_start)] if curr not in l_set: loops_stack.pop(-1) ast = l_ast.parent else: break if not loops_stack: l_prev_loop = -1 l_start = ctx.entry l_set = None level = ast.level if curr not in visited: # Check if we need to stop and wait on a node a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set, l_prev_loop, l_start, True) if a is None: continue ast = a remove_unnecessary_goto(ast, curr) # Check if we enter in a new loop if (l_start, curr) in ctx.gph.loops_all: if curr not in ctx.gctx.db.reverse_symbols: name = "loop_0x%x" % curr ctx.gctx.db.symbols[name] = curr ctx.gctx.db.reverse_symbols[curr] = name ctx.gctx.db.modified = True level += 1 a = Ast_Loop() a.level = level a.parent = ast a.idx_in_parent = len(ast.nodes) a.branch.parent = ast a.branch.level = level a.branch.idx_in_parent = len(ast.nodes) ast.add(a) ast = a.branch loops_stack.append((a, l_start, curr)) else_addr = -1 l_ast = a l_set = ctx.gph.loops_all[(l_start, curr)] l_prev_loop = l_start l_start = curr if (l_prev_loop, l_start) in ctx.gph.infinite_loop: a.is_infinite = True # Here curr may has changed if curr in visited: if curr == l_start: continue if len(ast.nodes) > 0: if isinstance(ast.nodes[-1], list): prev = ast.nodes[-1][0].address if prev not in ctx.gph.uncond_jumps_set: ast.add(Ast_Goto(curr)) else: ast.add(Ast_Goto(curr)) continue visited.add(curr) # Return instruction if curr not in ctx.gph.link_out: if curr != ctx.entry and curr not in ctx.gctx.db.reverse_symbols: name = "ret_0x%x" % curr ctx.gctx.db.symbols[name] = curr ctx.gctx.db.reverse_symbols[curr] = name ctx.gctx.db.modified = True ast.add(blk) continue nxt = ctx.gph.link_out[curr] if curr in ctx.gctx.dis.jmptables: ast.add(blk) for n in nxt: stack.append((ast, loops_stack, curr, n, else_addr)) elif len(nxt) == 2: # We are on a conditional jump prefetch = blk[1] if len(blk) == 2 else None if loops_stack: goto_set = False c1 = nxt[BRANCH_NEXT] not in l_set c2 = nxt[BRANCH_NEXT_JUMP] not in l_set if c1 and c2: raise ExcIfelse(curr) if c1: exit_loop = nxt[BRANCH_NEXT] nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP] cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0]) goto_set = True if c2: exit_loop = nxt[BRANCH_NEXT_JUMP] nxt_node_in_loop = nxt[BRANCH_NEXT] cond_id = ctx.gctx.libarch.utils.get_cond(blk[0]) goto_set = True # goto to exit a loop if goto_set: stack.append((ast.parent, list(loops_stack), curr, exit_loop, else_addr)) stack.append((ast, list(loops_stack), curr, nxt_node_in_loop, else_addr)) a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch) a.parent = ast a.level = level a.idx_in_parent = len(ast.nodes) ast.add(a) continue # and-if if ctx.gctx.print_andif: if else_addr == nxt[BRANCH_NEXT_JUMP]: cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT])) # Add a fake branch, with this in the manage function # all gotos to the else_addr will be invisible. stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) continue # and-if if else_addr == nxt[BRANCH_NEXT]: cond_id = ctx.gctx.libarch.utils.get_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT_JUMP])) stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) continue # if-else endpoint = search_endpoint(ctx, stack, ast, curr, l_set, l_prev_loop, l_start) ast_if = Ast_Branch() ast_if.parent = ast ast_if.level = level + 1 ast_if.idx_in_parent = len(ast.nodes) ast_else = Ast_Branch() ast_else.parent = ast ast_else.level = level + 1 ast_else.idx_in_parent = len(ast.nodes) else_addr = nxt[BRANCH_NEXT_JUMP] if endpoint != -1: if (l_start, endpoint) not in ctx.gph.false_loops: # If we have already seen this address (for example the # endpoint is the beginning of the current loop) we don't # re-add in the waiting list. if endpoint not in visited: manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set, l_prev_loop, l_start, False) else: endpoint = -1 stack.append( (ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) if endpoint == -1: # No endpoint, so it's not useful to have an else-branch # -> the stack will continue on `ast` a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) elif endpoint == else_addr: # Branch ast_else will be empty a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) else: a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast_else, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(endpoint)) else: ast.add(blk) stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr)) ast = ast_head remove_all_unnecessary_goto(ast) fix_non_consecutives(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Ast generated in %fs" % elapsed) # Process ast start = time() for func in ctx.gctx.libarch.registered: func(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Functions for processing ast in %fs" % elapsed) if ctx.gctx.color: ctx.gctx.libarch.process_ast.assign_colors(ctx, ast) if waiting: ast_head.nodes.insert(0, Ast_Comment("")) ast_head.nodes.insert(0, Ast_Comment("")) ast_head.nodes.insert( 0, Ast_Comment("WARNING: there is a bug, the output is incomplete !")) ast_head.nodes.insert(0, Ast_Comment("")) ast_head.nodes.insert(0, Ast_Comment("")) return ast, False return ast, True
def generate_ast(ctx__): global ctx ctx = ctx__ start = time() ast = Ast_Branch() ast.parent = None stack = [(ast, [], -1, ctx.entry_addr, -1)] visited = set() waiting = {} ast_head = ast while stack: ast, loops_stack, prev, curr, else_addr = stack.pop(-1) # Check if we enter in a false loop (see gotoinloop*) if loops_stack: _, _, l_start = loops_stack[-1] else: l_start = ctx.entry_addr if (l_start, curr) in ctx.gph.false_loops: continue # Check if we have already an other equivalent loop in waiting. if (l_start, curr) in ctx.gph.equiv: eq = ctx.gph.equiv[(l_start, curr)] dont_enter = False for ad in waiting: for i in waiting[ad].loop_start: if (i, ad) in eq: dont_enter = True break if dont_enter: break if dont_enter: # Restart main loop continue blk = ctx.gph.nodes[curr] # Exit the current loop while loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] l_set = ctx.gph.loops_all[(l_prev_loop, l_start)] if curr not in l_set: loops_stack.pop(-1) ast = l_ast.parent else: break if not loops_stack: l_prev_loop = -1 l_start = ctx.entry_addr l_set = None level = ast.level if curr not in visited: # Check if we need to stop and wait on a node a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set, l_prev_loop, l_start, True) if a is None: continue ast = a remove_unnecessary_goto(ast, curr) # Check if we enter in a new loop is_new_loop = True if (l_start, curr) not in ctx.gph.loops_all: is_new_loop = False else: # Check if if it's not equivalent as the current loop if loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] if (l_prev_loop, curr) in ctx.gph.equiv and \ (l_prev_loop, l_start) in ctx.gph.equiv[(l_prev_loop, curr)]: is_new_loop = False if is_new_loop: ctx.labels[curr] = "loop_0x%x" % curr level += 1 a = Ast_Loop() a.level = level a.parent = ast a.idx_in_parent = len(ast.nodes) a.branch.parent = ast a.branch.level = level a.branch.idx_in_parent = len(ast.nodes) ast.add(a) ast = a.branch loops_stack.append((a, l_start, curr)) else_addr = -1 l_ast = a l_set = ctx.gph.loops_all[(l_start, curr)] l_prev_loop = l_start l_start = curr if (l_prev_loop, l_start) in ctx.gph.infinite_loop: a.is_infinite = True # Here curr may has changed if curr in visited: if curr == l_start: continue if len(ast.nodes) > 0: if isinstance(ast.nodes[-1], list): prev = ast.nodes[-1][0].address if prev not in ctx.gph.uncond_jumps_set: ast.add(Ast_Goto(curr)) else: ast.add(Ast_Goto(curr)) continue visited.add(curr) # Return instruction if curr not in ctx.gph.link_out: ctx.labels[curr] = "ret_0x%x" % curr ast.add(blk) continue nxt = ctx.gph.link_out[curr] if curr in ctx.dis.jmptables: ast.add(blk) for n in nxt: stack.append((ast, loops_stack, curr, n, else_addr)) elif len(nxt) == 2: # We are on a conditional jump prefetch = blk[1] if len(blk) == 2 else None if loops_stack: goto_set = False c1 = nxt[BRANCH_NEXT] not in l_set c2 = nxt[BRANCH_NEXT_JUMP] not in l_set if c1 and c2: raise ExcIfelse(curr) if c1: exit_loop = nxt[BRANCH_NEXT] nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP] cond_id = ctx.libarch.utils.invert_cond(blk[0]) goto_set = True if c2: exit_loop = nxt[BRANCH_NEXT_JUMP] nxt_node_in_loop = nxt[BRANCH_NEXT] cond_id = ctx.libarch.utils.get_cond(blk[0]) goto_set = True # goto to exit a loop if goto_set: stack.append((ast.parent, list(loops_stack), curr, exit_loop, else_addr)) stack.append((ast, list(loops_stack), curr, nxt_node_in_loop, else_addr)) a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch) a.parent = ast a.level = level a.idx_in_parent = len(ast.nodes) ast.add(a) continue # and-if if ctx.print_andif: if else_addr == nxt[BRANCH_NEXT_JUMP]: cond_id = ctx.libarch.utils.invert_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT])) # Add a fake branch, with this in the manage function # all gotos to the else_addr will be invisible. fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) continue # and-if if else_addr == nxt[BRANCH_NEXT]: cond_id = ctx.libarch.utils.get_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT_JUMP])) fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) continue # if-else endpoint = search_endpoint(ctx, stack, ast, curr, l_set, l_prev_loop, l_start) ast_if = Ast_Branch() ast_if.parent = ast ast_if.level = level + 1 ast_if.idx_in_parent = len(ast.nodes) ast_else = Ast_Branch() ast_else.parent = ast ast_else.level = level + 1 ast_else.idx_in_parent = len(ast.nodes) else_addr = nxt[BRANCH_NEXT_JUMP] if endpoint != -1: if (l_start, endpoint) not in ctx.gph.false_loops: manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set, l_prev_loop, l_start, False) else: endpoint = -1 stack.append((ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) if endpoint == -1: # No endpoint, so it's not useful to have an else-branch # -> the stack will continue on `ast` a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) elif endpoint == else_addr: # Branch ast_else will be empty a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) else: a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast_else, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(endpoint)) else: ast.add(blk) stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr)) ast = ast_head remove_all_unnecessary_goto(ast) fix_non_consecutives(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Ast generated in %fs" % elapsed) # Process ast start = time() for func in ctx.libarch.registered: func(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Functions for processing ast in %fs" % elapsed) if ctx.color: ctx.libarch.process_ast.assign_colors(ctx, ast) return ast
def get_graph(self, addr): from capstone import CS_OP_IMM, CS_ARCH_MIPS ARCH_UTILS = self.load_arch_module().utils curr = self.lazy_disasm(addr) if curr == None: return None gph = Graph(self, addr) rest = [] start = time.clock() prefetch = None # WARNING: this assume that on every architectures the jump # address is the last operand (operands[-1]) while 1: if not gph.exists(curr): if self.arch == CS_ARCH_MIPS: prefetch = self.__prefetch_inst(curr) if ARCH_UTILS.is_uncond_jump(curr) and len(curr.operands) > 0: if curr.operands[-1].type == CS_OP_IMM: addr = curr.operands[-1].value.imm nxt = self.lazy_disasm(addr) gph.set_next(curr, nxt, prefetch) rest.append(nxt.address) else: # Can't interpret jmp ADDR|reg gph.add_node(curr, prefetch) gph.uncond_jumps_set.add(curr.address) elif ARCH_UTILS.is_cond_jump(curr) and len(curr.operands) > 0: if curr.operands[-1].type == CS_OP_IMM: nxt_jump = self.lazy_disasm(curr.operands[-1].value.imm) if self.arch == CS_ARCH_MIPS: direct_nxt = \ self.lazy_disasm(prefetch.address + prefetch.size) else: direct_nxt = \ self.lazy_disasm(curr.address + curr.size) gph.set_cond_next(curr, nxt_jump, direct_nxt, prefetch) rest.append(nxt_jump.address) rest.append(direct_nxt.address) else: # Can't interpret jmp ADDR|reg gph.add_node(curr, prefetch) gph.cond_jumps_set.add(curr.address) elif ARCH_UTILS.is_ret(curr): gph.add_node(curr, prefetch) else: try: nxt = self.lazy_disasm(curr.address + curr.size) gph.set_next(curr, nxt) rest.append(nxt.address) except: gph.add_node(curr) pass try: curr = self.lazy_disasm(rest.pop()) except IndexError: break if self.binary.type == T_BIN_PE: self.binary.pe_reverse_stripped_symbols(self) elapsed = time.clock() elapsed = elapsed - start debug__("Graph built in %fs" % elapsed) return gph
def get_graph(self, entry_addr): from capstone import CS_OP_IMM, CS_ARCH_MIPS ARCH_UTILS = self.load_arch_module().utils gph = Graph(self, entry_addr) stack = [entry_addr] start = time() prefetch = None # WARNING: this assume that on every architectures the jump # address is the last operand (operands[-1]) # Here each instruction is a node. Blocks will be created in the # function __simplify. while stack: ad = stack.pop() inst = self.lazy_disasm(ad) if inst is None: # Remove all previous instructions which have a link # to this instruction. if ad in gph.link_in: for i in gph.link_in[ad]: gph.link_out[i].remove(ad) for i in gph.link_in[ad]: if not gph.link_out[i]: del gph.link_out[i] del gph.link_in[ad] continue if gph.exists(inst): continue if ARCH_UTILS.is_ret(inst): if self.arch == CS_ARCH_MIPS: prefetch = self.__prefetch_inst(inst) gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_uncond_jump(inst): if self.arch == CS_ARCH_MIPS: prefetch = self.__prefetch_inst(inst) gph.uncond_jumps_set.add(ad) op = inst.operands[-1] if op.type == CS_OP_IMM: nxt = op.value.imm stack.append(nxt) gph.new_node(inst, prefetch, [nxt]) else: if inst.address in self.jmptables: table = self.jmptables[inst.address].table stack += table gph.new_node(inst, prefetch, table) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_cond_jump(inst): if self.arch == CS_ARCH_MIPS: prefetch = self.__prefetch_inst(inst) gph.cond_jumps_set.add(ad) op = inst.operands[-1] if op.type == CS_OP_IMM: if self.arch == CS_ARCH_MIPS: direct_nxt = prefetch.address + prefetch.size else: direct_nxt = inst.address + inst.size nxt_jmp = op.value.imm stack.append(direct_nxt) stack.append(nxt_jmp) gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp]) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) else: nxt = inst.address + inst.size stack.append(nxt) gph.new_node(inst, None, [nxt]) if len(gph.nodes) == 0: return None, 0 if self.binary.type == T_BIN_PE: nb_new_syms = self.binary.pe_reverse_stripped_symbols(self) else: nb_new_syms = 0 elapsed = time() elapsed = elapsed - start debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes))) return gph, nb_new_syms
def get_graph(self, entry_addr): from capstone import CS_OP_IMM, CS_ARCH_MIPS ARCH_UTILS = self.load_arch_module().utils gph = Graph(self, entry_addr) stack = [entry_addr] start = time() prefetch = None addresses = set() # WARNING: this assume that on every architectures the jump # address is the last operand (operands[-1]) # Here each instruction is a node. Blocks will be created in the # function __simplify. while stack: ad = stack.pop() inst = self.lazy_disasm(ad) if inst is None: # Remove all previous instructions which have a link # to this instruction. if ad in gph.link_in: for i in gph.link_in[ad]: gph.link_out[i].remove(ad) for i in gph.link_in[ad]: if not gph.link_out[i]: del gph.link_out[i] del gph.link_in[ad] continue if gph.exists(inst): continue addresses.add(ad) if ARCH_UTILS.is_ret(inst): if self.arch == CS_ARCH_MIPS: prefetch = self.__prefetch_inst(inst) addresses.add(prefetch.address) gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_uncond_jump(inst): if self.arch == CS_ARCH_MIPS: prefetch = self.__prefetch_inst(inst) addresses.add(prefetch.address) gph.uncond_jumps_set.add(ad) op = inst.operands[-1] if op.type == CS_OP_IMM: nxt = op.value.imm stack.append(nxt) gph.new_node(inst, prefetch, [nxt]) else: if inst.address in self.jmptables: table = self.jmptables[inst.address].table stack += table gph.new_node(inst, prefetch, table) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_cond_jump(inst): if self.arch == CS_ARCH_MIPS: prefetch = self.__prefetch_inst(inst) addresses.add(prefetch.address) gph.cond_jumps_set.add(ad) op = inst.operands[-1] if op.type == CS_OP_IMM: if self.arch == CS_ARCH_MIPS: direct_nxt = prefetch.address + prefetch.size else: direct_nxt = inst.address + inst.size nxt_jmp = op.value.imm stack.append(direct_nxt) stack.append(nxt_jmp) gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp]) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) else: nxt = inst.address + inst.size stack.append(nxt) gph.new_node(inst, None, [nxt]) if len(gph.nodes) == 0: return None, 0 if self.binary.type == T_BIN_PE: nb_new_syms = self.binary.pe_reverse_stripped_symbols(self, addresses) else: nb_new_syms = 0 elapsed = time() elapsed = elapsed - start debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes))) return gph, nb_new_syms
def generate_ast(ctx__): global ctx ctx = ctx__ start = time.clock() ast = Ast_Branch() ast.parent = None stack = [(ast, [], -1, ctx.entry_addr, -1)] visited = set() waiting = {} ast_head = ast while stack: ast, loops_stack, prev, curr, else_addr = stack.pop(-1) # Check if we enter in a false loop (see gotoinloop*) if loops_stack: _, _, l_start = loops_stack[-1] else: l_start = ctx.entry_addr if (l_start, curr) in ctx.gph.false_loops: continue # Check if we have already an other equivalent loop in waiting. if (l_start, curr) in ctx.gph.equiv: eq = ctx.gph.equiv[(l_start, curr)] dont_enter = False for ad in waiting: for i in waiting[ad].loop_start: if (i, ad) in eq: dont_enter = True break if dont_enter: break if dont_enter: # Restart main loop continue blk = ctx.gph.nodes[curr] # Exit the current loop while loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] l_set = ctx.gph.loops_all[(l_prev_loop, l_start)] if curr not in l_set: loops_stack.pop(-1) ast = l_ast.parent else: break if not loops_stack: l_prev_loop = -1 l_start = ctx.entry_addr l_set = None level = ast.level if curr not in visited: # Check if we need to stop and wait on a node a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set, l_prev_loop, l_start, True) if a is None: continue ast = a remove_unnecessary_goto(ast, curr) # Check if we enter in a new loop is_new_loop = True if (l_start, curr) not in ctx.gph.loops_all: is_new_loop = False else: # Check if if it's not equivalent as the current loop if loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] if (l_prev_loop, curr) in ctx.gph.equiv and \ (l_prev_loop, l_start) in ctx.gph.equiv[(l_prev_loop, curr)]: is_new_loop = False if is_new_loop: ctx.labels[curr] = "loop_0x%x" % curr level += 1 a = Ast_Loop() a.level = level a.parent = ast a.branch.parent = ast a.branch.level = level a.branch.idx_in_parent = len(ast.nodes) ast.add(a) ast = a.branch loops_stack.append((a, l_start, curr)) else_addr = -1 l_ast = a l_set = ctx.gph.loops_all[(l_start, curr)] l_prev_loop = l_start l_start = curr if (l_prev_loop, l_start) in ctx.gph.infinite_loop: a.is_infinite = True # Here curr may has changed if curr in visited: if curr == l_start: continue if len(ast.nodes) > 0: if not isinstance(ast.nodes[-1], list): ast.add(Ast_Goto(curr)) else: prev_inst = ast.nodes[-1][0] if not ctx.libarch.utils.is_uncond_jump(prev_inst): ast.add(Ast_Goto(curr)) else: ast.add(Ast_Goto(curr)) continue visited.add(curr) # Return instruction if curr not in ctx.gph.link_out: ctx.labels[curr] = "ret_0x%x" % curr ast.add(blk) continue nxt = ctx.gph.link_out[curr] if curr in ctx.dis.jmptables: ast.add(blk) for n in nxt: stack.append((ast, loops_stack, curr, n, else_addr)) elif len(nxt) == 2: # We are on a conditional jump prefetch = blk[1] if len(blk) == 2 else None if loops_stack: goto_set = False c1 = nxt[BRANCH_NEXT] not in l_set c2 = nxt[BRANCH_NEXT_JUMP] not in l_set if c1 and c2: raise ExcIfelse(curr) if c1: exit_loop = nxt[BRANCH_NEXT] nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP] cond_id = ctx.libarch.utils.invert_cond(blk[0]) goto_set = True if c2: exit_loop = nxt[BRANCH_NEXT_JUMP] nxt_node_in_loop = nxt[BRANCH_NEXT] cond_id = ctx.libarch.utils.get_cond(blk[0]) goto_set = True # goto to exit a loop if goto_set: stack.append((ast.parent, list(loops_stack), curr, exit_loop, else_addr)) stack.append((ast, list(loops_stack), curr, nxt_node_in_loop, else_addr)) a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch) a.parent = ast a.level = level ast.add(a) continue # and-if if ctx.print_andif: if else_addr == nxt[BRANCH_NEXT_JUMP]: cond_id = ctx.libarch.utils.invert_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch) a.parent = ast ast.add(a) # Add a fake branch, with this in the manage function # all gotos to the else_addr will be invisible. fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) continue # and-if if else_addr == nxt[BRANCH_NEXT]: cond_id = ctx.libarch.utils.get_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch) a.parent = ast ast.add(a) fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) continue # if-else endpoint = search_endpoint(ctx, stack, ast, curr, l_set, l_prev_loop, l_start) ast_if = Ast_Branch() ast_if.parent = ast ast_if.level = level + 1 ast_if.idx_in_parent = len(ast.nodes) ast_else = Ast_Branch() ast_else.parent = ast ast_else.level = level + 1 ast_else.idx_in_parent = len(ast.nodes) else_addr = nxt[BRANCH_NEXT_JUMP] if endpoint != -1: if (l_start, endpoint) not in ctx.gph.false_loops: manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set, l_prev_loop, l_start, False) else: endpoint = -1 stack.append( (ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) if endpoint == -1: a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) elif endpoint == else_addr: # Branch ast_else will be empty a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) else: a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast_else, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 ast.add(a) else: ast.add(blk) stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr)) ast = ast_head remove_all_unnecessary_goto(ast) add_goto_after_alone_andif(ast) add_goto_if_inst_not_consecutives(ctx, ast) elapsed = time.clock() elapsed = elapsed - start debug__("Ast generated in %fs" % elapsed) # Process ast start = time.clock() for func in ctx.libarch.registered: func(ctx, ast) elapsed = time.clock() elapsed = elapsed - start debug__("Functions for processing ast in %fs" % elapsed) if ctx.color: ctx.libarch.process_ast.assign_colors(ctx, ast) return ast