def generate_ast(ctx__): global ctx ctx = ctx__ start = time() ast = Ast_Branch() ast.parent = None stack = [(ast, [], -1, ctx.entry, -1)] visited = set() waiting = {} ast_head = ast fake_br = Ast_Branch() fake_br.level = sys.maxsize while stack or waiting: if not stack and waiting: if not ctx.gph.skipped_loops_analysis: break for ad in set(waiting): waiting[ad].unseen.clear() stack.append((fake_br, [], -1, ad, -1)) ast, loops_stack, prev, curr, else_addr = stack.pop(-1) # Check if we enter in a false loop (see gotoinloop*) if loops_stack: _, _, l_start = loops_stack[-1] else: l_start = ctx.entry if (l_start, curr) in ctx.gph.false_loops: continue blk = ctx.gph.nodes[curr] # Exit the current loop while loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] l_set = ctx.gph.loops_all[(l_prev_loop, l_start)] if curr not in l_set: loops_stack.pop(-1) ast = l_ast.parent else: break if not loops_stack: l_prev_loop = -1 l_start = ctx.entry l_set = None level = ast.level if curr not in visited: # Check if we need to stop and wait on a node a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set, l_prev_loop, l_start, True) if a is None: continue ast = a remove_unnecessary_goto(ast, curr) # Check if we enter in a new loop if (l_start, curr) in ctx.gph.loops_all: if curr not in ctx.gctx.db.reverse_symbols: name = "loop_0x%x" % curr ctx.gctx.db.symbols[name] = curr ctx.gctx.db.reverse_symbols[curr] = name ctx.gctx.db.modified = True level += 1 a = Ast_Loop() a.level = level a.parent = ast a.idx_in_parent = len(ast.nodes) a.branch.parent = ast a.branch.level = level a.branch.idx_in_parent = len(ast.nodes) ast.add(a) ast = a.branch loops_stack.append((a, l_start, curr)) else_addr = -1 l_ast = a l_set = ctx.gph.loops_all[(l_start, curr)] l_prev_loop = l_start l_start = curr if (l_prev_loop, l_start) in ctx.gph.infinite_loop: a.is_infinite = True # Here curr may has changed if curr in visited: if curr == l_start: continue if len(ast.nodes) > 0: if isinstance(ast.nodes[-1], list): prev = ast.nodes[-1][0].address if prev not in ctx.gph.uncond_jumps_set: ast.add(Ast_Goto(curr)) else: ast.add(Ast_Goto(curr)) continue visited.add(curr) # Return instruction if curr not in ctx.gph.link_out: if curr != ctx.entry and curr not in ctx.gctx.db.reverse_symbols: name = "ret_0x%x" % curr ctx.gctx.db.symbols[name] = curr ctx.gctx.db.reverse_symbols[curr] = name ctx.gctx.db.modified = True ast.add(blk) continue nxt = ctx.gph.link_out[curr] if curr in ctx.gctx.dis.jmptables: ast.add(blk) for n in nxt: stack.append((ast, loops_stack, curr, n, else_addr)) elif len(nxt) == 2: # We are on a conditional jump prefetch = blk[1] if len(blk) == 2 else None if loops_stack: goto_set = False c1 = nxt[BRANCH_NEXT] not in l_set c2 = nxt[BRANCH_NEXT_JUMP] not in l_set if c1 and c2: raise ExcIfelse(curr) if c1: exit_loop = nxt[BRANCH_NEXT] nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP] cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0]) goto_set = True if c2: exit_loop = nxt[BRANCH_NEXT_JUMP] nxt_node_in_loop = nxt[BRANCH_NEXT] cond_id = ctx.gctx.libarch.utils.get_cond(blk[0]) goto_set = True # goto to exit a loop if goto_set: stack.append((ast.parent, list(loops_stack), curr, exit_loop, else_addr)) stack.append((ast, list(loops_stack), curr, nxt_node_in_loop, else_addr)) a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch) a.parent = ast a.level = level a.idx_in_parent = len(ast.nodes) ast.add(a) continue # and-if if ctx.gctx.print_andif: if else_addr == nxt[BRANCH_NEXT_JUMP]: cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT])) # Add a fake branch, with this in the manage function # all gotos to the else_addr will be invisible. stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) continue # and-if if else_addr == nxt[BRANCH_NEXT]: cond_id = ctx.gctx.libarch.utils.get_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT_JUMP])) stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) continue # if-else endpoint = search_endpoint(ctx, stack, ast, curr, l_set, l_prev_loop, l_start) ast_if = Ast_Branch() ast_if.parent = ast ast_if.level = level + 1 ast_if.idx_in_parent = len(ast.nodes) ast_else = Ast_Branch() ast_else.parent = ast ast_else.level = level + 1 ast_else.idx_in_parent = len(ast.nodes) else_addr = nxt[BRANCH_NEXT_JUMP] if endpoint != -1: if (l_start, endpoint) not in ctx.gph.false_loops: # If we have already seen this address (for example the # endpoint is the beginning of the current loop) we don't # re-add in the waiting list. if endpoint not in visited: manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set, l_prev_loop, l_start, False) else: endpoint = -1 stack.append( (ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) if endpoint == -1: # No endpoint, so it's not useful to have an else-branch # -> the stack will continue on `ast` a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) elif endpoint == else_addr: # Branch ast_else will be empty a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) else: a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast_else, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(endpoint)) else: ast.add(blk) stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr)) ast = ast_head remove_all_unnecessary_goto(ast) fix_non_consecutives(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Ast generated in %fs" % elapsed) # Process ast start = time() for func in ctx.gctx.libarch.registered: func(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Functions for processing ast in %fs" % elapsed) if ctx.gctx.color: ctx.gctx.libarch.process_ast.assign_colors(ctx, ast) if waiting: ast_head.nodes.insert(0, Ast_Comment("")) ast_head.nodes.insert(0, Ast_Comment("")) ast_head.nodes.insert( 0, Ast_Comment("WARNING: there is a bug, the output is incomplete !")) ast_head.nodes.insert(0, Ast_Comment("")) ast_head.nodes.insert(0, Ast_Comment("")) return ast, False return ast, True
def generate_ast(ctx__): global ctx ctx = ctx__ start = time() ast = Ast_Branch() ast.parent = None stack = [(ast, [], -1, ctx.entry_addr, -1)] visited = set() waiting = {} ast_head = ast while stack: ast, loops_stack, prev, curr, else_addr = stack.pop(-1) # Check if we enter in a false loop (see gotoinloop*) if loops_stack: _, _, l_start = loops_stack[-1] else: l_start = ctx.entry_addr if (l_start, curr) in ctx.gph.false_loops: continue # Check if we have already an other equivalent loop in waiting. if (l_start, curr) in ctx.gph.equiv: eq = ctx.gph.equiv[(l_start, curr)] dont_enter = False for ad in waiting: for i in waiting[ad].loop_start: if (i, ad) in eq: dont_enter = True break if dont_enter: break if dont_enter: # Restart main loop continue blk = ctx.gph.nodes[curr] # Exit the current loop while loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] l_set = ctx.gph.loops_all[(l_prev_loop, l_start)] if curr not in l_set: loops_stack.pop(-1) ast = l_ast.parent else: break if not loops_stack: l_prev_loop = -1 l_start = ctx.entry_addr l_set = None level = ast.level if curr not in visited: # Check if we need to stop and wait on a node a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set, l_prev_loop, l_start, True) if a is None: continue ast = a remove_unnecessary_goto(ast, curr) # Check if we enter in a new loop is_new_loop = True if (l_start, curr) not in ctx.gph.loops_all: is_new_loop = False else: # Check if if it's not equivalent as the current loop if loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] if (l_prev_loop, curr) in ctx.gph.equiv and \ (l_prev_loop, l_start) in ctx.gph.equiv[(l_prev_loop, curr)]: is_new_loop = False if is_new_loop: ctx.labels[curr] = "loop_0x%x" % curr level += 1 a = Ast_Loop() a.level = level a.parent = ast a.idx_in_parent = len(ast.nodes) a.branch.parent = ast a.branch.level = level a.branch.idx_in_parent = len(ast.nodes) ast.add(a) ast = a.branch loops_stack.append((a, l_start, curr)) else_addr = -1 l_ast = a l_set = ctx.gph.loops_all[(l_start, curr)] l_prev_loop = l_start l_start = curr if (l_prev_loop, l_start) in ctx.gph.infinite_loop: a.is_infinite = True # Here curr may has changed if curr in visited: if curr == l_start: continue if len(ast.nodes) > 0: if isinstance(ast.nodes[-1], list): prev = ast.nodes[-1][0].address if prev not in ctx.gph.uncond_jumps_set: ast.add(Ast_Goto(curr)) else: ast.add(Ast_Goto(curr)) continue visited.add(curr) # Return instruction if curr not in ctx.gph.link_out: ctx.labels[curr] = "ret_0x%x" % curr ast.add(blk) continue nxt = ctx.gph.link_out[curr] if curr in ctx.dis.jmptables: ast.add(blk) for n in nxt: stack.append((ast, loops_stack, curr, n, else_addr)) elif len(nxt) == 2: # We are on a conditional jump prefetch = blk[1] if len(blk) == 2 else None if loops_stack: goto_set = False c1 = nxt[BRANCH_NEXT] not in l_set c2 = nxt[BRANCH_NEXT_JUMP] not in l_set if c1 and c2: raise ExcIfelse(curr) if c1: exit_loop = nxt[BRANCH_NEXT] nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP] cond_id = ctx.libarch.utils.invert_cond(blk[0]) goto_set = True if c2: exit_loop = nxt[BRANCH_NEXT_JUMP] nxt_node_in_loop = nxt[BRANCH_NEXT] cond_id = ctx.libarch.utils.get_cond(blk[0]) goto_set = True # goto to exit a loop if goto_set: stack.append((ast.parent, list(loops_stack), curr, exit_loop, else_addr)) stack.append((ast, list(loops_stack), curr, nxt_node_in_loop, else_addr)) a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch) a.parent = ast a.level = level a.idx_in_parent = len(ast.nodes) ast.add(a) continue # and-if if ctx.print_andif: if else_addr == nxt[BRANCH_NEXT_JUMP]: cond_id = ctx.libarch.utils.invert_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT])) # Add a fake branch, with this in the manage function # all gotos to the else_addr will be invisible. fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) continue # and-if if else_addr == nxt[BRANCH_NEXT]: cond_id = ctx.libarch.utils.get_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT_JUMP])) fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) continue # if-else endpoint = search_endpoint(ctx, stack, ast, curr, l_set, l_prev_loop, l_start) ast_if = Ast_Branch() ast_if.parent = ast ast_if.level = level + 1 ast_if.idx_in_parent = len(ast.nodes) ast_else = Ast_Branch() ast_else.parent = ast ast_else.level = level + 1 ast_else.idx_in_parent = len(ast.nodes) else_addr = nxt[BRANCH_NEXT_JUMP] if endpoint != -1: if (l_start, endpoint) not in ctx.gph.false_loops: manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set, l_prev_loop, l_start, False) else: endpoint = -1 stack.append((ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) if endpoint == -1: # No endpoint, so it's not useful to have an else-branch # -> the stack will continue on `ast` a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) elif endpoint == else_addr: # Branch ast_else will be empty a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) else: a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast_else, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(endpoint)) else: ast.add(blk) stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr)) ast = ast_head remove_all_unnecessary_goto(ast) fix_non_consecutives(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Ast generated in %fs" % elapsed) # Process ast start = time() for func in ctx.libarch.registered: func(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Functions for processing ast in %fs" % elapsed) if ctx.color: ctx.libarch.process_ast.assign_colors(ctx, ast) return ast
def generate_ast(ctx__): global ctx ctx = ctx__ start = time.clock() ast = Ast_Branch() ast.parent = None stack = [(ast, [], -1, ctx.entry_addr, -1)] visited = set() waiting = {} ast_head = ast while stack: ast, loops_stack, prev, curr, else_addr = stack.pop(-1) # Check if we enter in a false loop (see gotoinloop*) if loops_stack: _, _, l_start = loops_stack[-1] else: l_start = ctx.entry_addr if (l_start, curr) in ctx.gph.false_loops: continue # Check if we have already an other equivalent loop in waiting. if (l_start, curr) in ctx.gph.equiv: eq = ctx.gph.equiv[(l_start, curr)] dont_enter = False for ad in waiting: for i in waiting[ad].loop_start: if (i, ad) in eq: dont_enter = True break if dont_enter: break if dont_enter: # Restart main loop continue blk = ctx.gph.nodes[curr] # Exit the current loop while loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] l_set = ctx.gph.loops_all[(l_prev_loop, l_start)] if curr not in l_set: loops_stack.pop(-1) ast = l_ast.parent else: break if not loops_stack: l_prev_loop = -1 l_start = ctx.entry_addr l_set = None level = ast.level if curr not in visited: # Check if we need to stop and wait on a node a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set, l_prev_loop, l_start, True) if a is None: continue ast = a remove_unnecessary_goto(ast, curr) # Check if we enter in a new loop is_new_loop = True if (l_start, curr) not in ctx.gph.loops_all: is_new_loop = False else: # Check if if it's not equivalent as the current loop if loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] if (l_prev_loop, curr) in ctx.gph.equiv and \ (l_prev_loop, l_start) in ctx.gph.equiv[(l_prev_loop, curr)]: is_new_loop = False if is_new_loop: ctx.labels[curr] = "loop_0x%x" % curr level += 1 a = Ast_Loop() a.level = level a.parent = ast a.branch.parent = ast a.branch.level = level a.branch.idx_in_parent = len(ast.nodes) ast.add(a) ast = a.branch loops_stack.append((a, l_start, curr)) else_addr = -1 l_ast = a l_set = ctx.gph.loops_all[(l_start, curr)] l_prev_loop = l_start l_start = curr if (l_prev_loop, l_start) in ctx.gph.infinite_loop: a.is_infinite = True # Here curr may has changed if curr in visited: if curr == l_start: continue if len(ast.nodes) > 0: if not isinstance(ast.nodes[-1], list): ast.add(Ast_Goto(curr)) else: prev_inst = ast.nodes[-1][0] if not ctx.libarch.utils.is_uncond_jump(prev_inst): ast.add(Ast_Goto(curr)) else: ast.add(Ast_Goto(curr)) continue visited.add(curr) # Return instruction if curr not in ctx.gph.link_out: ctx.labels[curr] = "ret_0x%x" % curr ast.add(blk) continue nxt = ctx.gph.link_out[curr] if curr in ctx.dis.jmptables: ast.add(blk) for n in nxt: stack.append((ast, loops_stack, curr, n, else_addr)) elif len(nxt) == 2: # We are on a conditional jump prefetch = blk[1] if len(blk) == 2 else None if loops_stack: goto_set = False c1 = nxt[BRANCH_NEXT] not in l_set c2 = nxt[BRANCH_NEXT_JUMP] not in l_set if c1 and c2: raise ExcIfelse(curr) if c1: exit_loop = nxt[BRANCH_NEXT] nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP] cond_id = ctx.libarch.utils.invert_cond(blk[0]) goto_set = True if c2: exit_loop = nxt[BRANCH_NEXT_JUMP] nxt_node_in_loop = nxt[BRANCH_NEXT] cond_id = ctx.libarch.utils.get_cond(blk[0]) goto_set = True # goto to exit a loop if goto_set: stack.append((ast.parent, list(loops_stack), curr, exit_loop, else_addr)) stack.append((ast, list(loops_stack), curr, nxt_node_in_loop, else_addr)) a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch) a.parent = ast a.level = level ast.add(a) continue # and-if if ctx.print_andif: if else_addr == nxt[BRANCH_NEXT_JUMP]: cond_id = ctx.libarch.utils.invert_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch) a.parent = ast ast.add(a) # Add a fake branch, with this in the manage function # all gotos to the else_addr will be invisible. fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) continue # and-if if else_addr == nxt[BRANCH_NEXT]: cond_id = ctx.libarch.utils.get_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch) a.parent = ast ast.add(a) fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) continue # if-else endpoint = search_endpoint(ctx, stack, ast, curr, l_set, l_prev_loop, l_start) ast_if = Ast_Branch() ast_if.parent = ast ast_if.level = level + 1 ast_if.idx_in_parent = len(ast.nodes) ast_else = Ast_Branch() ast_else.parent = ast ast_else.level = level + 1 ast_else.idx_in_parent = len(ast.nodes) else_addr = nxt[BRANCH_NEXT_JUMP] if endpoint != -1: if (l_start, endpoint) not in ctx.gph.false_loops: manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set, l_prev_loop, l_start, False) else: endpoint = -1 stack.append( (ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) if endpoint == -1: a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) elif endpoint == else_addr: # Branch ast_else will be empty a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) else: a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast_else, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 ast.add(a) else: ast.add(blk) stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr)) ast = ast_head remove_all_unnecessary_goto(ast) add_goto_after_alone_andif(ast) add_goto_if_inst_not_consecutives(ctx, ast) elapsed = time.clock() elapsed = elapsed - start debug__("Ast generated in %fs" % elapsed) # Process ast start = time.clock() for func in ctx.libarch.registered: func(ctx, ast) elapsed = time.clock() elapsed = elapsed - start debug__("Functions for processing ast in %fs" % elapsed) if ctx.color: ctx.libarch.process_ast.assign_colors(ctx, ast) return ast