def add_node(i, last_cond, br_lst): if br_lst: if last_cond == ARM_CC_AL: added_nodes[i].append(br_lst) else: br = Ast_Branch() br.add(br_lst) added_nodes[i].append(Ast_If_cond(last_cond, br))
def get_ast_loop(paths, last_loop, last_else, endif): ast = Ast_Loop() curr_loop_idx = paths.get_loops_idx() first_blk = gph.nodes[get_loop_start(curr_loop_idx)] if is_cond_jump(first_blk[0]): ast.add(get_ast_ifgoto(paths, curr_loop_idx, first_blk[0])) else: ast.add(first_blk) loop_paths, endloop = paths.extract_loop_paths(curr_loop_idx) # Checking if endloop == [] to determine if it's an # infinite loop is not sufficient # tests/nestedloop2 ast.set_infinite(paths_is_infinite(loop_paths)) paths.pop() ast.add(get_ast_branch(loop_paths, curr_loop_idx, last_else)) if not endloop: return ast, -1 epilog = Ast_Branch() if len(endloop) > 1: i = 1 for el in endloop[:-1]: epilog.add(Ast_Comment("endloop " + str(i))) epilog.add(get_ast_branch(el, last_loop, last_else)) i += 1 epilog.add(Ast_Comment("endloop " + str(i))) ast.set_epilog(epilog) return ast, endloop[-1].first()
def get_ast_loop(ctx, paths, last_loop_idx, last_else, endif): ast = Ast_Loop() curr_loop_idx = paths.get_loops_idx() first_blk = ctx.gph.nodes[paths.get_loop_start(curr_loop_idx)] # if the first instruction of the loop is a jump, it means # it's a loop condition. if first_blk[0].address in ctx.gph.cond_jumps_set: ast.add(get_ast_ifgoto(ctx, paths, curr_loop_idx, first_blk[0])) else: ast.add(first_blk) loop_paths, loopends, loopends_start = \ paths.extract_loop_paths(curr_loop_idx, last_loop_idx, endif) # Checking if loopend == [] to determine if it's an # infinite loop is not sufficient # tests/nestedloop2 ast.set_infinite(paths_is_infinite(loop_paths)) addr = loop_paths.pop(1)[0] ctx.seen.add(addr) ast.add(get_ast_branch(ctx, loop_paths, curr_loop_idx, last_else)) if not loopends: return ast, -1 epilog = Ast_Branch() if len(loopends) > 1: epilog_num = 1 for i, el in enumerate(loopends[:-1]): if isinstance(el, Ast_Goto): epilog.add(el) continue if el.first() in loopends_start: epilog.add(Ast_Comment("loopend " + str(epilog_num))) epilog_num += 1 epilog.add(get_ast_branch(ctx, el, last_loop_idx, last_else)) if loopends[-1].first() in loopends_start: epilog.add(Ast_Comment("loopend " + str(epilog_num))) ast.set_epilog(epilog) return ast, loopends[-1].first()
def get_ast_loop(ctx, paths, last_loop_idx, last_else): ast = Ast_Loop() curr_loop_idx = paths.get_loops_idx() first_blk = ctx.gph.nodes[paths.get_loop_start(curr_loop_idx)] # if the first instruction of the loop is a jump, it means # it's a loop condition. if first_blk[0].address in ctx.gph.cond_jumps_set: ast.add(get_ast_ifgoto(ctx, paths, curr_loop_idx, first_blk[0])) else: ast.add(first_blk) loop_paths, loopends, loopends_start = \ paths.extract_loop_paths(curr_loop_idx, last_loop_idx) # Checking if loopend == [] to determine if it's an # infinite loop is not sufficient # tests/nestedloop2 ast.set_infinite(paths_is_infinite(loop_paths)) addr = loop_paths.pop(1)[0] ctx.seen.add(addr) ast.add(get_ast_branch(ctx, loop_paths, curr_loop_idx)) if not loopends: return ast, -1 epilog = Ast_Branch() if len(loopends) > 1: epilog_num = 1 for i, el in enumerate(loopends[:-1]): if isinstance(el, Ast_Goto): epilog.add(el) continue if el.first() in loopends_start: epilog.add(Ast_Comment("loopend " + str(epilog_num))) epilog_num += 1 epilog.add(get_ast_branch(ctx, el, last_loop_idx)) if loopends[-1].first() in loopends_start: epilog.add(Ast_Comment("loopend " + str(epilog_num))) ast.set_epilog(epilog) return ast, loopends[-1].first()
def get_ast_loop(ctx, paths, last_loop_idx, last_else, endif): ast = Ast_Loop() curr_loop_idx = paths.get_loops_idx() first_blk = ctx.gph.nodes[paths.get_loop_start(curr_loop_idx)] if first_blk[0].address in ctx.gph.cond_jumps_set: ast.add(get_ast_ifgoto(ctx, paths, curr_loop_idx, first_blk[0])) else: ast.add(first_blk) loop_paths, endloops, endloops_start = \ paths.extract_loop_paths(curr_loop_idx, last_loop_idx, endif) # Checking if endloop == [] to determine if it's an # infinite loop is not sufficient # tests/nestedloop2 ast.set_infinite(paths_is_infinite(loop_paths)) addr = loop_paths.pop(1)[0] ctx.seen.add(addr) ast.add(get_ast_branch(ctx, loop_paths, curr_loop_idx, last_else)) if not endloops: return ast, -1 epilog = Ast_Branch() if len(endloops) > 1: i = 1 for el in endloops[:-1]: if el.first() in endloops_start: epilog.add(Ast_Comment("endloop " + str(i))) i += 1 epilog.add(get_ast_branch(ctx, el, last_loop_idx, last_else)) epilog.add(Ast_Comment("endloop " + str(i))) ast.set_epilog(epilog) return ast, endloops[-1].first()
def generate_ast(ctx__): global ctx ctx = ctx__ start = time() ast = Ast_Branch() ast.parent = None stack = [(ast, [], -1, ctx.entry_addr, -1)] visited = set() waiting = {} ast_head = ast while stack: ast, loops_stack, prev, curr, else_addr = stack.pop(-1) # Check if we enter in a false loop (see gotoinloop*) if loops_stack: _, _, l_start = loops_stack[-1] else: l_start = ctx.entry_addr if (l_start, curr) in ctx.gph.false_loops: continue # Check if we have already an other equivalent loop in waiting. if (l_start, curr) in ctx.gph.equiv: eq = ctx.gph.equiv[(l_start, curr)] dont_enter = False for ad in waiting: for i in waiting[ad].loop_start: if (i, ad) in eq: dont_enter = True break if dont_enter: break if dont_enter: # Restart main loop continue blk = ctx.gph.nodes[curr] # Exit the current loop while loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] l_set = ctx.gph.loops_all[(l_prev_loop, l_start)] if curr not in l_set: loops_stack.pop(-1) ast = l_ast.parent else: break if not loops_stack: l_prev_loop = -1 l_start = ctx.entry_addr l_set = None level = ast.level if curr not in visited: # Check if we need to stop and wait on a node a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set, l_prev_loop, l_start, True) if a is None: continue ast = a remove_unnecessary_goto(ast, curr) # Check if we enter in a new loop is_new_loop = True if (l_start, curr) not in ctx.gph.loops_all: is_new_loop = False else: # Check if if it's not equivalent as the current loop if loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] if (l_prev_loop, curr) in ctx.gph.equiv and \ (l_prev_loop, l_start) in ctx.gph.equiv[(l_prev_loop, curr)]: is_new_loop = False if is_new_loop: ctx.labels[curr] = "loop_0x%x" % curr level += 1 a = Ast_Loop() a.level = level a.parent = ast a.idx_in_parent = len(ast.nodes) a.branch.parent = ast a.branch.level = level a.branch.idx_in_parent = len(ast.nodes) ast.add(a) ast = a.branch loops_stack.append((a, l_start, curr)) else_addr = -1 l_ast = a l_set = ctx.gph.loops_all[(l_start, curr)] l_prev_loop = l_start l_start = curr if (l_prev_loop, l_start) in ctx.gph.infinite_loop: a.is_infinite = True # Here curr may has changed if curr in visited: if curr == l_start: continue if len(ast.nodes) > 0: if isinstance(ast.nodes[-1], list): prev = ast.nodes[-1][0].address if prev not in ctx.gph.uncond_jumps_set: ast.add(Ast_Goto(curr)) else: ast.add(Ast_Goto(curr)) continue visited.add(curr) # Return instruction if curr not in ctx.gph.link_out: ctx.labels[curr] = "ret_0x%x" % curr ast.add(blk) continue nxt = ctx.gph.link_out[curr] if curr in ctx.dis.jmptables: ast.add(blk) for n in nxt: stack.append((ast, loops_stack, curr, n, else_addr)) elif len(nxt) == 2: # We are on a conditional jump prefetch = blk[1] if len(blk) == 2 else None if loops_stack: goto_set = False c1 = nxt[BRANCH_NEXT] not in l_set c2 = nxt[BRANCH_NEXT_JUMP] not in l_set if c1 and c2: raise ExcIfelse(curr) if c1: exit_loop = nxt[BRANCH_NEXT] nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP] cond_id = ctx.libarch.utils.invert_cond(blk[0]) goto_set = True if c2: exit_loop = nxt[BRANCH_NEXT_JUMP] nxt_node_in_loop = nxt[BRANCH_NEXT] cond_id = ctx.libarch.utils.get_cond(blk[0]) goto_set = True # goto to exit a loop if goto_set: stack.append((ast.parent, list(loops_stack), curr, exit_loop, else_addr)) stack.append((ast, list(loops_stack), curr, nxt_node_in_loop, else_addr)) a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch) a.parent = ast a.level = level a.idx_in_parent = len(ast.nodes) ast.add(a) continue # and-if if ctx.print_andif: if else_addr == nxt[BRANCH_NEXT_JUMP]: cond_id = ctx.libarch.utils.invert_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT])) # Add a fake branch, with this in the manage function # all gotos to the else_addr will be invisible. fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) continue # and-if if else_addr == nxt[BRANCH_NEXT]: cond_id = ctx.libarch.utils.get_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT_JUMP])) fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) continue # if-else endpoint = search_endpoint(ctx, stack, ast, curr, l_set, l_prev_loop, l_start) ast_if = Ast_Branch() ast_if.parent = ast ast_if.level = level + 1 ast_if.idx_in_parent = len(ast.nodes) ast_else = Ast_Branch() ast_else.parent = ast ast_else.level = level + 1 ast_else.idx_in_parent = len(ast.nodes) else_addr = nxt[BRANCH_NEXT_JUMP] if endpoint != -1: if (l_start, endpoint) not in ctx.gph.false_loops: manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set, l_prev_loop, l_start, False) else: endpoint = -1 stack.append((ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) if endpoint == -1: # No endpoint, so it's not useful to have an else-branch # -> the stack will continue on `ast` a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) elif endpoint == else_addr: # Branch ast_else will be empty a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) else: a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast_else, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(endpoint)) else: ast.add(blk) stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr)) ast = ast_head remove_all_unnecessary_goto(ast) fix_non_consecutives(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Ast generated in %fs" % elapsed) # Process ast start = time() for func in ctx.libarch.registered: func(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Functions for processing ast in %fs" % elapsed) if ctx.color: ctx.libarch.process_ast.assign_colors(ctx, ast) return ast
def generate_ast(ctx__): global ctx ctx = ctx__ start = time() ast = Ast_Branch() ast.parent = None stack = [(ast, [], -1, ctx.entry, -1)] visited = set() waiting = {} ast_head = ast fake_br = Ast_Branch() fake_br.level = sys.maxsize while stack or waiting: if not stack and waiting: if not ctx.gph.skipped_loops_analysis: break for ad in set(waiting): waiting[ad].unseen.clear() stack.append((fake_br, [], -1, ad, -1)) ast, loops_stack, prev, curr, else_addr = stack.pop(-1) # Check if we enter in a false loop (see gotoinloop*) if loops_stack: _, _, l_start = loops_stack[-1] else: l_start = ctx.entry if (l_start, curr) in ctx.gph.false_loops: continue blk = ctx.gph.nodes[curr] # Exit the current loop while loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] l_set = ctx.gph.loops_all[(l_prev_loop, l_start)] if curr not in l_set: loops_stack.pop(-1) ast = l_ast.parent else: break if not loops_stack: l_prev_loop = -1 l_start = ctx.entry l_set = None level = ast.level if curr not in visited: # Check if we need to stop and wait on a node a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set, l_prev_loop, l_start, True) if a is None: continue ast = a remove_unnecessary_goto(ast, curr) # Check if we enter in a new loop if (l_start, curr) in ctx.gph.loops_all: if curr not in ctx.gctx.db.reverse_symbols: name = "loop_0x%x" % curr ctx.gctx.db.symbols[name] = curr ctx.gctx.db.reverse_symbols[curr] = name ctx.gctx.db.modified = True level += 1 a = Ast_Loop() a.level = level a.parent = ast a.idx_in_parent = len(ast.nodes) a.branch.parent = ast a.branch.level = level a.branch.idx_in_parent = len(ast.nodes) ast.add(a) ast = a.branch loops_stack.append((a, l_start, curr)) else_addr = -1 l_ast = a l_set = ctx.gph.loops_all[(l_start, curr)] l_prev_loop = l_start l_start = curr if (l_prev_loop, l_start) in ctx.gph.infinite_loop: a.is_infinite = True # Here curr may has changed if curr in visited: if curr == l_start: continue if len(ast.nodes) > 0: if isinstance(ast.nodes[-1], list): prev = ast.nodes[-1][0].address if prev not in ctx.gph.uncond_jumps_set: ast.add(Ast_Goto(curr)) else: ast.add(Ast_Goto(curr)) continue visited.add(curr) # Return instruction if curr not in ctx.gph.link_out: if curr != ctx.entry and curr not in ctx.gctx.db.reverse_symbols: name = "ret_0x%x" % curr ctx.gctx.db.symbols[name] = curr ctx.gctx.db.reverse_symbols[curr] = name ctx.gctx.db.modified = True ast.add(blk) continue nxt = ctx.gph.link_out[curr] if curr in ctx.gctx.dis.jmptables: ast.add(blk) for n in nxt: stack.append((ast, loops_stack, curr, n, else_addr)) elif len(nxt) == 2: # We are on a conditional jump prefetch = blk[1] if len(blk) == 2 else None if loops_stack: goto_set = False c1 = nxt[BRANCH_NEXT] not in l_set c2 = nxt[BRANCH_NEXT_JUMP] not in l_set if c1 and c2: raise ExcIfelse(curr) if c1: exit_loop = nxt[BRANCH_NEXT] nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP] cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0]) goto_set = True if c2: exit_loop = nxt[BRANCH_NEXT_JUMP] nxt_node_in_loop = nxt[BRANCH_NEXT] cond_id = ctx.gctx.libarch.utils.get_cond(blk[0]) goto_set = True # goto to exit a loop if goto_set: stack.append((ast.parent, list(loops_stack), curr, exit_loop, else_addr)) stack.append((ast, list(loops_stack), curr, nxt_node_in_loop, else_addr)) a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch) a.parent = ast a.level = level a.idx_in_parent = len(ast.nodes) ast.add(a) continue # and-if if ctx.gctx.print_andif: if else_addr == nxt[BRANCH_NEXT_JUMP]: cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT])) # Add a fake branch, with this in the manage function # all gotos to the else_addr will be invisible. stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) continue # and-if if else_addr == nxt[BRANCH_NEXT]: cond_id = ctx.gctx.libarch.utils.get_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT_JUMP])) stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) continue # if-else endpoint = search_endpoint(ctx, stack, ast, curr, l_set, l_prev_loop, l_start) ast_if = Ast_Branch() ast_if.parent = ast ast_if.level = level + 1 ast_if.idx_in_parent = len(ast.nodes) ast_else = Ast_Branch() ast_else.parent = ast ast_else.level = level + 1 ast_else.idx_in_parent = len(ast.nodes) else_addr = nxt[BRANCH_NEXT_JUMP] if endpoint != -1: if (l_start, endpoint) not in ctx.gph.false_loops: # If we have already seen this address (for example the # endpoint is the beginning of the current loop) we don't # re-add in the waiting list. if endpoint not in visited: manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set, l_prev_loop, l_start, False) else: endpoint = -1 stack.append( (ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) if endpoint == -1: # No endpoint, so it's not useful to have an else-branch # -> the stack will continue on `ast` a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) elif endpoint == else_addr: # Branch ast_else will be empty a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) else: a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast_else, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(endpoint)) else: ast.add(blk) stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr)) ast = ast_head remove_all_unnecessary_goto(ast) fix_non_consecutives(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Ast generated in %fs" % elapsed) # Process ast start = time() for func in ctx.gctx.libarch.registered: func(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Functions for processing ast in %fs" % elapsed) if ctx.gctx.color: ctx.gctx.libarch.process_ast.assign_colors(ctx, ast) if waiting: ast_head.nodes.insert(0, Ast_Comment("")) ast_head.nodes.insert(0, Ast_Comment("")) ast_head.nodes.insert( 0, Ast_Comment("WARNING: there is a bug, the output is incomplete !")) ast_head.nodes.insert(0, Ast_Comment("")) ast_head.nodes.insert(0, Ast_Comment("")) return ast, False return ast, True
def get_ast_branch(ctx, paths, curr_loop_idx=[], last_else=-1, endif=-1): ast = Ast_Branch() if_printed = False if paths.rm_empty_paths(): return ast while 1: ad = paths.first() if ad in ctx.seen: ast.add(Ast_Goto(ad)) return ast # Stop at the first split or loop nb_commons, is_loop, is_ifelse, force_stop_addr = \ paths.head_last_common(curr_loop_idx) if nb_commons > 0: common_path = paths.pop(nb_commons) for ad in common_path: ctx.seen.add(ad) blk = ctx.gph.nodes[ad] # Here if we have conditional jump, it's not a ifelse, # it's a condition for a loop. It will be replaced by a # goto. ifgoto are skipped by head_last_common. if ad in ctx.gph.cond_jumps_set: inst = blk[0] # first inst ast.add(get_ast_ifgoto(ctx, paths, curr_loop_idx, inst)) else: ast.add(blk) if paths.rm_empty_paths(): return ast ad = paths.first() if ad in ctx.seen: ast.add(Ast_Goto(ad)) return ast # See comments in paths.__enter_new_loop if force_stop_addr != 0: ad = paths.first() blk = ctx.gph.nodes[ad] ast.add(blk) if ad not in ctx.gph.uncond_jumps_set: ast.add(Ast_Goto(ctx.gph.link_out[blk[0].address][BRANCH_NEXT])) break if is_loop: # last_else == -1 # -> we can't go to a same else inside a loop a, endpoint = get_ast_loop(ctx, paths, curr_loop_idx, -1, endif) ast.add(a) elif is_ifelse: a, endpoint = get_ast_ifelse( ctx, paths, curr_loop_idx, last_else, if_printed, endif) if_printed = isinstance(a, Ast_Ifelse) ast.add(a) else: endpoint = paths.first() if endpoint == -1 or paths.goto_addr(endpoint): break return ast
def get_ast_branch(ctx, paths, curr_loop_idx=[], last_else=-1): ast = Ast_Branch() is_if_printed = False if paths.rm_empty_paths(): return ast while 1: ad = paths.first() if ad in ctx.seen: ast.add(Ast_Goto(ad)) return ast # Stop at the first split or loop nb_commons, is_loop, is_ifelse, force_stop_addr = \ paths.head_last_common(curr_loop_idx) if nb_commons > 0: common_path = paths.pop(nb_commons) for ad in common_path: ctx.seen.add(ad) blk = ctx.gph.nodes[ad] # Here if we have conditional jump, it's not a ifelse, # it's a condition for a loop. It will be replaced by a # goto. ifgoto are skipped by head_last_common. if ad in ctx.gph.cond_jumps_set: inst = blk[0] # first inst ast.add(get_ast_ifgoto(ctx, paths, curr_loop_idx, inst)) else: ast.add(blk) if paths.rm_empty_paths(): return ast ad = paths.first() if ad in ctx.seen: ast.add(Ast_Goto(ad)) return ast # See comments in paths.__enter_new_loop if force_stop_addr != 0: ad = paths.first() blk = ctx.gph.nodes[ad] ast.add(blk) if ad not in ctx.gph.uncond_jumps_set: ast.add(Ast_Goto( ctx.gph.link_out[blk[0].address][BRANCH_NEXT])) break if is_loop: # last_else == -1 # -> we can't go to a same else inside a loop a, endpoint = get_ast_loop(ctx, paths, curr_loop_idx, -1) ast.add(a) elif is_ifelse: a, endpoint = get_ast_ifelse(ctx, paths, curr_loop_idx, last_else, is_if_printed) is_if_printed = isinstance(a, Ast_Ifelse) ast.add(a) else: endpoint = paths.first() if endpoint == -1 or paths.goto_addr(endpoint): break return ast
def generate_ast(ctx__): global ctx ctx = ctx__ start = time.clock() ast = Ast_Branch() ast.parent = None stack = [(ast, [], -1, ctx.entry_addr, -1)] visited = set() waiting = {} ast_head = ast while stack: ast, loops_stack, prev, curr, else_addr = stack.pop(-1) # Check if we enter in a false loop (see gotoinloop*) if loops_stack: _, _, l_start = loops_stack[-1] else: l_start = ctx.entry_addr if (l_start, curr) in ctx.gph.false_loops: continue # Check if we have already an other equivalent loop in waiting. if (l_start, curr) in ctx.gph.equiv: eq = ctx.gph.equiv[(l_start, curr)] dont_enter = False for ad in waiting: for i in waiting[ad].loop_start: if (i, ad) in eq: dont_enter = True break if dont_enter: break if dont_enter: # Restart main loop continue blk = ctx.gph.nodes[curr] # Exit the current loop while loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] l_set = ctx.gph.loops_all[(l_prev_loop, l_start)] if curr not in l_set: loops_stack.pop(-1) ast = l_ast.parent else: break if not loops_stack: l_prev_loop = -1 l_start = ctx.entry_addr l_set = None level = ast.level if curr not in visited: # Check if we need to stop and wait on a node a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set, l_prev_loop, l_start, True) if a is None: continue ast = a remove_unnecessary_goto(ast, curr) # Check if we enter in a new loop is_new_loop = True if (l_start, curr) not in ctx.gph.loops_all: is_new_loop = False else: # Check if if it's not equivalent as the current loop if loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] if (l_prev_loop, curr) in ctx.gph.equiv and \ (l_prev_loop, l_start) in ctx.gph.equiv[(l_prev_loop, curr)]: is_new_loop = False if is_new_loop: ctx.labels[curr] = "loop_0x%x" % curr level += 1 a = Ast_Loop() a.level = level a.parent = ast a.branch.parent = ast a.branch.level = level a.branch.idx_in_parent = len(ast.nodes) ast.add(a) ast = a.branch loops_stack.append((a, l_start, curr)) else_addr = -1 l_ast = a l_set = ctx.gph.loops_all[(l_start, curr)] l_prev_loop = l_start l_start = curr if (l_prev_loop, l_start) in ctx.gph.infinite_loop: a.is_infinite = True # Here curr may has changed if curr in visited: if curr == l_start: continue if len(ast.nodes) > 0: if not isinstance(ast.nodes[-1], list): ast.add(Ast_Goto(curr)) else: prev_inst = ast.nodes[-1][0] if not ctx.libarch.utils.is_uncond_jump(prev_inst): ast.add(Ast_Goto(curr)) else: ast.add(Ast_Goto(curr)) continue visited.add(curr) # Return instruction if curr not in ctx.gph.link_out: ctx.labels[curr] = "ret_0x%x" % curr ast.add(blk) continue nxt = ctx.gph.link_out[curr] if curr in ctx.dis.jmptables: ast.add(blk) for n in nxt: stack.append((ast, loops_stack, curr, n, else_addr)) elif len(nxt) == 2: # We are on a conditional jump prefetch = blk[1] if len(blk) == 2 else None if loops_stack: goto_set = False c1 = nxt[BRANCH_NEXT] not in l_set c2 = nxt[BRANCH_NEXT_JUMP] not in l_set if c1 and c2: raise ExcIfelse(curr) if c1: exit_loop = nxt[BRANCH_NEXT] nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP] cond_id = ctx.libarch.utils.invert_cond(blk[0]) goto_set = True if c2: exit_loop = nxt[BRANCH_NEXT_JUMP] nxt_node_in_loop = nxt[BRANCH_NEXT] cond_id = ctx.libarch.utils.get_cond(blk[0]) goto_set = True # goto to exit a loop if goto_set: stack.append((ast.parent, list(loops_stack), curr, exit_loop, else_addr)) stack.append((ast, list(loops_stack), curr, nxt_node_in_loop, else_addr)) a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch) a.parent = ast a.level = level ast.add(a) continue # and-if if ctx.print_andif: if else_addr == nxt[BRANCH_NEXT_JUMP]: cond_id = ctx.libarch.utils.invert_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch) a.parent = ast ast.add(a) # Add a fake branch, with this in the manage function # all gotos to the else_addr will be invisible. fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) continue # and-if if else_addr == nxt[BRANCH_NEXT]: cond_id = ctx.libarch.utils.get_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch) a.parent = ast ast.add(a) fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) continue # if-else endpoint = search_endpoint(ctx, stack, ast, curr, l_set, l_prev_loop, l_start) ast_if = Ast_Branch() ast_if.parent = ast ast_if.level = level + 1 ast_if.idx_in_parent = len(ast.nodes) ast_else = Ast_Branch() ast_else.parent = ast ast_else.level = level + 1 ast_else.idx_in_parent = len(ast.nodes) else_addr = nxt[BRANCH_NEXT_JUMP] if endpoint != -1: if (l_start, endpoint) not in ctx.gph.false_loops: manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set, l_prev_loop, l_start, False) else: endpoint = -1 stack.append( (ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) if endpoint == -1: a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) elif endpoint == else_addr: # Branch ast_else will be empty a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) else: a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast_else, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 ast.add(a) else: ast.add(blk) stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr)) ast = ast_head remove_all_unnecessary_goto(ast) add_goto_after_alone_andif(ast) add_goto_if_inst_not_consecutives(ctx, ast) elapsed = time.clock() elapsed = elapsed - start debug__("Ast generated in %fs" % elapsed) # Process ast start = time.clock() for func in ctx.libarch.registered: func(ctx, ast) elapsed = time.clock() elapsed = elapsed - start debug__("Functions for processing ast in %fs" % elapsed) if ctx.color: ctx.libarch.process_ast.assign_colors(ctx, ast) return ast
def get_ast_branch(paths, curr_loop_idx=[], last_else=-1, endif=-1): ast = Ast_Branch() if_printed = False while 1: if paths.rm_empty_paths(): break # Stop on the first split or is_loop until, is_loop, is_ifelse, force_stop_addr = \ paths.head_last_common(curr_loop_idx) # Add code to the branch, and update paths # until == -1 if there is no common point at the begining last = -1 while last != until: blk = gph.nodes[paths.first()] inst = blk[0] # first inst # Here if we have conditional jump, it's not a ifelse, # it's a condition for a loop. It will be replaced by a # goto. ifgoto are skipped by head_last_common. if is_cond_jump(inst): ast.add(get_ast_ifgoto(paths, curr_loop_idx, inst)) else: ast.add(blk) last = paths.pop() if paths.rm_empty_paths(): break if force_stop_addr != 0: blk = gph.nodes[paths.first()] ast.add(blk) if not is_uncond_jump(blk[0]): ast.add(Ast_Jmp(gph.link_out[blk[0].address][BRANCH_NEXT])) break if is_loop: # last_else == -1 # -> we can't go to a same else inside a loop a, endpoint = get_ast_loop(paths, curr_loop_idx, -1, endif) ast.add(a) elif is_ifelse: a, endpoint = get_ast_ifelse(paths, curr_loop_idx, last_else, if_printed, endif) if_printed = isinstance(a, Ast_Ifelse) ast.add(a) else: endpoint = paths.first() if endpoint == -1: break paths.goto_addr(endpoint) return ast