def add_goto_if_inst_not_consecutives(ctx, ast): if isinstance(ast, Ast_Branch): prev_blk = None idx_to_add = {} for i, n in enumerate(ast.nodes): if isinstance(n, list): if prev_blk is not None and \ prev_blk[-1].address not in ctx.gph.uncond_jumps_set and \ prev_blk[-1].address in ctx.gph.link_out: nxt = prev_blk[-1].address + prev_blk[-1].size if nxt != n[0].address: idx_to_add[i] = nxt prev_blk = n else: prev_blk = None add_goto_if_inst_not_consecutives(ctx, n) if not idx_to_add: return # Add from the end of the nodes list lst = list(idx_to_add.keys()) lst.sort() for i in lst: ast.nodes.insert(i, Ast_Goto(idx_to_add[i])) elif isinstance(ast, Ast_Ifelse): add_goto_if_inst_not_consecutives(ctx, ast.br_next) add_goto_if_inst_not_consecutives(ctx, ast.br_next_jump) elif isinstance(ast, Ast_Loop): add_goto_if_inst_not_consecutives(ctx, ast.branch)
def fix_non_consecutives(ctx, ast): if isinstance(ast, Ast_Branch): prev_blk = None idx_to_add = {} for i, n in enumerate(ast.nodes): if isinstance(n, list): if prev_blk is not None: prev = prev_blk[0].address if prev not in ctx.gph.uncond_jumps_set and \ prev in ctx.gph.link_out: nxt = ctx.gph.link_out[prev][BRANCH_NEXT] if nxt != n[0].address: idx_to_add[i] = nxt prev_blk = n else: prev_blk = None fix_non_consecutives(ctx, n) if not idx_to_add: return # Add from the end of the nodes list lst = list(idx_to_add.keys()) lst.sort() for i in reversed(lst): ast.nodes.insert(i, Ast_Goto(idx_to_add[i])) elif isinstance(ast, Ast_Ifelse): fix_non_consecutives(ctx, ast.br_next) fix_non_consecutives(ctx, ast.br_next_jump) elif isinstance(ast, Ast_Loop): fix_non_consecutives(ctx, ast.branch)
def rm_waiting(ctx, waiting, ad): # Get the ast which has the smallest level min_level_idx = -1 list_ast = waiting[ad].ast list_loop_start = waiting[ad].loop_start for i, a in enumerate(list_ast): if (list_loop_start[i], ad) in ctx.gph.false_loops: continue if min_level_idx == -1 or a.level < list_ast[min_level_idx].level: min_level_idx = i if min_level_idx == -1: print("errorD: this is a bug, please report") sys.exit(1) ast = list_ast[min_level_idx] # Add goto on each other ast # If they are finally unuseful, they will be deleted with # remove_unnecessary_goto or in remove_unnecessary_goto for i, a in enumerate(list_ast): if i == min_level_idx: continue if len(a.nodes) == 0: a.add(Ast_Goto(ad)) continue # The previous instruction has not `ad` as the next instruction if isinstance(a.nodes[-1], list): prev = a.nodes[-1][0].address if prev in ctx.gph.uncond_jumps_set: continue if prev in ctx.gph.link_out: n = ctx.gph.link_out[prev][BRANCH_NEXT] if n != ad: a.add(Ast_Goto(n)) continue # The previous is a goto, skip it if isinstance(a.nodes[-1], Ast_Goto): continue a.add(Ast_Goto(ad)) waiting[ad].ast.clear() del waiting[ad] return ast
def manage_endpoint(ctx, waiting, ast, prev, ad, l_set, l_prev_loop, l_start, ad_is_visited): if ad not in ctx.gph.link_in or len(ctx.gph.link_in[ad]) <= 1: return ast # If ad_is_visited is False it means this is a prevision for a future # visit on this node. Here prev has no sense. if not ad_is_visited: if ad not in waiting: unseen = get_unseen_links_in(ad, l_set, l_prev_loop, l_start) waiting[ad] = Endpoint(ast, unseen, l_start) return None if ad in waiting: waiting[ad].rendezvous(ast, prev, l_start) if len(waiting[ad].unseen) != 0: return None # Get the ast which has the smallest level min_level_idx = -1 list_ast = waiting[ad].ast list_loop_start = waiting[ad].loop_start for i, a in enumerate(list_ast): if (list_loop_start[i], ad) in ctx.gph.false_loops: continue if min_level_idx == -1 or a.level < list_ast[min_level_idx].level: min_level_idx = i if min_level_idx == -1: print("errorD: this is a bug, please report") sys.exit(1) ast = list_ast[min_level_idx] for i, a in enumerate(list_ast): if i != min_level_idx and len(a.nodes) == 0: a.add(Ast_Goto(ad)) waiting[ad].ast.clear() del waiting[ad] return ast unseen = get_unseen_links_in(ad, l_set, l_prev_loop, l_start) if len(unseen) > 1: unseen.remove(prev) waiting[ad] = Endpoint(ast, unseen, l_start) return None return ast
def add_goto_after_alone_andif(ast): if isinstance(ast, Ast_Branch): if len(ast.nodes) > 0 and isinstance(ast.nodes[-1], Ast_AndIf): ast.add(Ast_Goto(ast.nodes[-1].expected_next_addr)) for n in ast.nodes: if not isinstance(n, list): add_goto_after_alone_andif(n) elif isinstance(ast, Ast_Ifelse): add_goto_after_alone_andif(ast.br_next) add_goto_after_alone_andif(ast.br_next_jump) elif isinstance(ast, Ast_Loop): add_goto_after_alone_andif(ast.branch)
def fix_non_consecutives(ctx, ast): if isinstance(ast, Ast_Branch): idx_to_add = {} for i, n in enumerate(ast.nodes): if isinstance(n, list): ad = n[0].address if ad in ctx.gph.uncond_jumps_set or ad not in ctx.gph.link_out: continue nxt1 = ctx.gph.link_out[ad][BRANCH_NEXT] if i == len(ast.nodes) - 1: loop_start = is_last_in_loop(ast, i) if loop_start != -1: if nxt1 != loop_start: idx_to_add[i + 1] = nxt1 continue nxt2 = get_next_addr(ast) else: nxt2 = get_first_addr(ast.nodes[i + 1]) if nxt1 != nxt2: idx_to_add[i + 1] = nxt1 else: fix_non_consecutives(ctx, n) if not idx_to_add: return # Add from the end of the nodes list lst = list(idx_to_add.keys()) lst.sort() for i in reversed(lst): ast.nodes.insert(i, Ast_Goto(idx_to_add[i])) elif isinstance(ast, Ast_Ifelse): fix_non_consecutives(ctx, ast.br_next) fix_non_consecutives(ctx, ast.br_next_jump) elif isinstance(ast, Ast_Loop): fix_non_consecutives(ctx, ast.branch)
def extract_loop_paths(self, curr_loop_idx, last_loop_idx, endif): # TODO optimize.... loop_paths = Paths(self.gph) tmp_endloops = Paths(self.gph) # ------------------------------------------------------ # Distinction of loop-paths / endloops # ------------------------------------------------------ for k, p in self.paths.items(): keep, ignore = self.__keep_path(curr_loop_idx, p, k) if not ignore: if keep: loop_paths.add_path(k, p, self.__get_loop_idx(k)) else: tmp_endloops.add_path(k, p, self.__get_loop_idx(k)) # Remove the beginning of the loop to get only the endloop for k, el in tmp_endloops.paths.items(): for i, addr in enumerate(el): if addr not in loop_paths: p = el[i:] if not p in tmp_endloops.paths.values(): tmp_endloops.paths[k] = p else: tmp_endloops.paths[k] = [] break tmp_endloops.rm_empty_paths() # ------------------------------------------------------ # Regroup paths if they start with the same addr # ------------------------------------------------------ grp_endloops = {} for k, el in tmp_endloops.paths.items(): if el[0] not in grp_endloops: grp_endloops[el[0]] = Paths(self.gph) grp_endloops[el[0]].add_path(k, el, tmp_endloops.__get_loop_idx(k)) # ------------------------------------------------------ # Just store the beginning of each endloop. It will # be returned by the function. We need it for printing # a comment "endloop NUMBER". Later we add more endloops # due to common endpoints. # ------------------------------------------------------ endloops_start = {ad for ad in grp_endloops} # debug__("endloops_start") # debug__(endloops_start) if len(grp_endloops) <= 1: return loop_paths, list(grp_endloops.values()), endloops_start # ------------------------------------------------------ # Endpoints bruteforce between all paths # Searching an endpoint is used to avoid common address # between two paths. A path will be cut at this endpoint. # ------------------------------------------------------ def search_first_common(loops_idx, p1, p2): # TODO hack... if p1.are_all_looping(-1, False, loops_idx) or \ p2.are_all_looping(-1, False, loops_idx): return -1 # TODO optimize tmp = Paths(self.gph) tmp.paths.update(p1.paths) tmp.paths.update(p2.paths) tmp.looping.update(p1.looping) tmp.looping.update(p2.looping) return tmp.first_common(loops_idx) # Check if the address n is the next address of g def has_next(g, n): for k, p in g.paths.items(): nxt = self.gph_link_out[p[-1]] if len(nxt) == 1 and nxt[BRANCH_NEXT] == n: return True return False grp2_keys = set(grp_endloops.keys()) all_endpoints = {} for ad1, els1 in grp_endloops.items(): # Optimization to not compare twice two sets (for # example g1 with g2 g2 with g1). grp2_keys.remove(ad1) for ad2 in grp2_keys: els2 = grp_endloops[ad2] endpoint = search_first_common(last_loop_idx, els1, els2) # print("endpoint: ", hex(ad1), hex(ad2), "=", hex(endpoint)) if endpoint != -1: if endpoint not in all_endpoints: all_endpoints[endpoint] = set() all_endpoints[endpoint].add(ad1) all_endpoints[endpoint].add(ad2) # If we have all endloops at the end of an if, there will # be no endpoints between them (the endpoints is outside) # So check all groups if the next is the "endif". if endif != -1 and endif not in grp_endloops: # Add a fake group for ad, els in grp_endloops.items(): if has_next(els, endif): if endif not in all_endpoints: all_endpoints[endif] = set() all_endpoints[endif].add(ad) grp_endloops[endif] = Paths(self.gph) grp_endloops[endif].paths[-1] = [endif] # ------------------------------------------------------ # Compute endpoints dependencies # A path can contains multiple endpoints with multiple # paths. So we need to check which endpoint is the first. # ------------------------------------------------------ rev_depends_on = {} edp2_keys = list(all_endpoints.keys()) has_no_dep = set(all_endpoints.keys()) for edp1, adset1 in all_endpoints.items(): # Optimization to not compare twice two sets edp2_keys.remove(edp1) for edp2 in edp2_keys: adset2 = all_endpoints[edp2] if adset1.issubset(adset2): all_endpoints[edp2] -= adset1 if edp1 not in rev_depends_on: rev_depends_on[edp1] = {edp2} else: rev_depends_on[edp1].add(edp2) if edp2 in has_no_dep: has_no_dep.remove(edp2) elif adset2.issubset(adset1): all_endpoints[edp1] -= adset2 if edp2 not in rev_depends_on: rev_depends_on[edp2] = {edp1} else: rev_depends_on[edp2].add(edp1) if edp1 in has_no_dep: has_no_dep.remove(edp1) # Now remove indirect dependencies # For example if we have : e1 -> e2 -> e3 # e1 has a dependence inverse with [e2,e3] # Here we just want to keep e2. e2_keys = list(rev_depends_on.keys()) for e1, s1 in rev_depends_on.items(): # Optimization to not compare twice two sets e2_keys.remove(e1) for e2 in e2_keys: s2 = rev_depends_on[e2] if s1.issubset(s2): rev_depends_on[e2] -= s1 elif s2.issubset(s1): rev_depends_on[e1] -= s2 # debug__("all_endpoints endpoint: address") # debug__(all_endpoints) # debug__("endpoints without dependencies") # debug__(has_no_dep) # debug__("rev_depends_on") # debug__(rev_depends_on) # ------------------------------------------------------ # Search which endpoints we must see first. A path can # contains multiple endpoint with other paths. # ------------------------------------------------------ endpoints_sort = [] seen = set() def rec(e): endpoints_sort.append(e) seen.add(e) if e not in rev_depends_on: return for rev_e in rev_depends_on[e]: if rev_e not in seen: rec(rev_e) for e in has_no_dep: rec(e) # debug__("endpoints_sort") # debug__(endpoints_sort) # ------------------------------------------------------ # Cut paths to avoid duplicate code and create new # groups. Paths are cut at each endpoints. # ------------------------------------------------------ prev_cut_idx = {} for k in tmp_endloops.paths: prev_cut_idx[k] = 0 # Function to cut each path of the group g. Because we can # have multiple endpoints in one path, prev_cut_idx is used # to store the last index of the previous endpoint. # All paths are cut like this : [prev_cut_idx:endpoint] # or [index(force_start_e):next_endpoint] def cut_path(g, e, force_start_e=-1): els = grp_endloops[g] newp = Paths(self.gph) all_finish_by_jump = True for k, p in els.paths.items(): if force_start_e != -1: start = index(p, force_start_e) else: start = prev_cut_idx[k] stop = -1 if e == -1 else index(p, e) if stop == -1: stop = len(p) if force_start_e == -1: prev_cut_idx[k] = start loop_idx = -1 if stop == len(p): loop_idx = els.__get_loop_idx(k) if start == 0 and stop == len(p): p2 = p else: p2 = p[start:stop] if not p2: continue newp.add_path(k, p2, loop_idx) # If it's an internal loop, we don't have to check # if the last instruction is a jump. if els.__is_looping(k, last_loop_idx): continue # Check if the last instruction is a jump and # go to the endpoint. if p[stop - 1] in self.gph_link_out: nxt = self.gph_link_out[p[stop - 1]] # TODO need to check cond jumps ? if not(len(nxt) == 1 and \ p[stop-1] in self.gph_uncond_jumps_set and \ nxt[BRANCH_NEXT] == e): all_finish_by_jump = False else: # It's a return, there is nothing after. It must be # in the future dict 'next_no_jump'. all_finish_by_jump = False return newp, all_finish_by_jump # List of group-Paths. All paths have a jump at the end. with_jump = [] # Contains the next address of a group. These groups # must be sorted. next_no_jump = {} # group_addr -> next_address saved_paths = {} # group_addr -> Paths seen_endloops = set() # All groups are recreated. They are copied to saved_paths # or with_jump. for i, e in enumerate(endpoints_sort): # Cut paths to get the beginning of the endpoint until # the end or the next endpoint. # Check if the next endpoint is a dependence of the # current. It means that these two endpoints are in # a same group. next_e = -1 if i + 1 < len(endpoints_sort): tmp_e = endpoints_sort[i + 1] if e in rev_depends_on and tmp_e in rev_depends_on[e]: next_e = tmp_e if e in grp_endloops: # TODO optimize by avoiding the copy of # grp_endloops[e] if next_e == -1 # -> until the end newp, all_finish_by_jump = cut_path(e, next_e, force_start_e=e) seen_endloops.add(e) else: # Take one group it doesn't matter which one is it # If one group contains the endpoint e, all paths must # be in g. g = next(iter(all_endpoints[e])) # TODO optimize by avoiding the copy of # grp_endloops[e] if next_e == -1 # -> until the end newp, all_finish_by_jump = cut_path(g, next_e, force_start_e=e) seen_endloops.add(g) if all_finish_by_jump: # print("4 ---->", hex(newp.first()), hex(e), hex(next_e)) with_jump.append(newp) else: # print("3 ---->", hex(newp.first()), hex(e), hex(next_e)) next_no_jump[e] = next_e saved_paths[e] = newp # Now cut all paths until the endpoint. If a previous # endpoints was in the group, we start to cut at this # one (see prev_cut_idx). for g in all_endpoints[e]: # This prevent to not duplicate endpoints which are at # the same time the beginning of a group. if g != e: newp, all_finish_by_jump = cut_path(g, e) if all_finish_by_jump: # print("2 ---->", hex(newp.first()), hex(e)) with_jump.append(newp) else: # print("1 ---->", hex(newp.first()), hex(e)) head = newp.first() next_no_jump[head] = e saved_paths[head] = newp seen_endloops.add(g) # ------------------------------------------------------ # Sort endloops. # ------------------------------------------------------ list_grp_endloops = [] # It's possible that a path have no endpoints with others. # For example if we have an infinite loop in the loop. # Or if these paths are at the end of an if (tests/server). # debug__(endloops_start) # debug__(seen_endloops) other_paths = endloops_start - seen_endloops for ad in other_paths: list_grp_endloops.append(grp_endloops[ad]) # Because all these paths finish with a jump, the order # is not important. for els in with_jump: if len(els.paths) > 0: list_grp_endloops.append(els) # Now we must order these paths. They have a direct access to # the next group (no jump), so must sort them. endloops_sort = [] # Just for a better output, we sort the addresses. We want that # the last endloop is the "real last". get_ast_loop will return # endloops[-1]. We assume that the last in no_dep has the longuest # path than the first one. el_with_dep = {n for n in next_no_jump.values() if n != -1} el_no_dep = list(next_no_jump.keys() - el_with_dep) el_no_dep.sort() # debug__(el_no_dep) # debug__(next_no_jump) for ad in el_no_dep: n = ad while n != -1: if n != endif: endloops_sort.append(n) n = next_no_jump[n] # debug__(endloops_sort) el_seen = set() for i, ad in enumerate(endloops_sort): # Sometimes it's not possible to merge endpoints due to some goto. # (tests/goto4). If a Path is duplicated it may crash the program # because a path is modified when a branch is traversed. The solution # is to make a copy of the object. if ad in el_seen: list_grp_endloops.append(saved_paths[ad].copy()) else: list_grp_endloops.append(saved_paths[ad]) el_seen.add(ad) # This is a HACK. # It's possible that endloops were not correclty sorted (due to # weird gotos). So check that and add a goto, if the next is not the # one expected. (tests/goto5) nxt = next_no_jump[ad] if nxt != -1 and i < len( endloops_sort) - 1 and nxt != endloops_sort[i + 1]: list_grp_endloops.append(Ast_Goto(nxt)) return loop_paths, list_grp_endloops, endloops_start
def generate_ast(ctx__): global ctx ctx = ctx__ start = time() ast = Ast_Branch() ast.parent = None stack = [(ast, [], -1, ctx.entry, -1)] visited = set() waiting = {} ast_head = ast fake_br = Ast_Branch() fake_br.level = sys.maxsize while stack or waiting: if not stack and waiting: if not ctx.gph.skipped_loops_analysis: break for ad in set(waiting): waiting[ad].unseen.clear() stack.append((fake_br, [], -1, ad, -1)) ast, loops_stack, prev, curr, else_addr = stack.pop(-1) # Check if we enter in a false loop (see gotoinloop*) if loops_stack: _, _, l_start = loops_stack[-1] else: l_start = ctx.entry if (l_start, curr) in ctx.gph.false_loops: continue blk = ctx.gph.nodes[curr] # Exit the current loop while loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] l_set = ctx.gph.loops_all[(l_prev_loop, l_start)] if curr not in l_set: loops_stack.pop(-1) ast = l_ast.parent else: break if not loops_stack: l_prev_loop = -1 l_start = ctx.entry l_set = None level = ast.level if curr not in visited: # Check if we need to stop and wait on a node a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set, l_prev_loop, l_start, True) if a is None: continue ast = a remove_unnecessary_goto(ast, curr) # Check if we enter in a new loop if (l_start, curr) in ctx.gph.loops_all: if curr not in ctx.gctx.db.reverse_symbols: name = "loop_0x%x" % curr ctx.gctx.db.symbols[name] = curr ctx.gctx.db.reverse_symbols[curr] = name ctx.gctx.db.modified = True level += 1 a = Ast_Loop() a.level = level a.parent = ast a.idx_in_parent = len(ast.nodes) a.branch.parent = ast a.branch.level = level a.branch.idx_in_parent = len(ast.nodes) ast.add(a) ast = a.branch loops_stack.append((a, l_start, curr)) else_addr = -1 l_ast = a l_set = ctx.gph.loops_all[(l_start, curr)] l_prev_loop = l_start l_start = curr if (l_prev_loop, l_start) in ctx.gph.infinite_loop: a.is_infinite = True # Here curr may has changed if curr in visited: if curr == l_start: continue if len(ast.nodes) > 0: if isinstance(ast.nodes[-1], list): prev = ast.nodes[-1][0].address if prev not in ctx.gph.uncond_jumps_set: ast.add(Ast_Goto(curr)) else: ast.add(Ast_Goto(curr)) continue visited.add(curr) # Return instruction if curr not in ctx.gph.link_out: if curr != ctx.entry and curr not in ctx.gctx.db.reverse_symbols: name = "ret_0x%x" % curr ctx.gctx.db.symbols[name] = curr ctx.gctx.db.reverse_symbols[curr] = name ctx.gctx.db.modified = True ast.add(blk) continue nxt = ctx.gph.link_out[curr] if curr in ctx.gctx.dis.jmptables: ast.add(blk) for n in nxt: stack.append((ast, loops_stack, curr, n, else_addr)) elif len(nxt) == 2: # We are on a conditional jump prefetch = blk[1] if len(blk) == 2 else None if loops_stack: goto_set = False c1 = nxt[BRANCH_NEXT] not in l_set c2 = nxt[BRANCH_NEXT_JUMP] not in l_set if c1 and c2: raise ExcIfelse(curr) if c1: exit_loop = nxt[BRANCH_NEXT] nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP] cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0]) goto_set = True if c2: exit_loop = nxt[BRANCH_NEXT_JUMP] nxt_node_in_loop = nxt[BRANCH_NEXT] cond_id = ctx.gctx.libarch.utils.get_cond(blk[0]) goto_set = True # goto to exit a loop if goto_set: stack.append((ast.parent, list(loops_stack), curr, exit_loop, else_addr)) stack.append((ast, list(loops_stack), curr, nxt_node_in_loop, else_addr)) a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch) a.parent = ast a.level = level a.idx_in_parent = len(ast.nodes) ast.add(a) continue # and-if if ctx.gctx.print_andif: if else_addr == nxt[BRANCH_NEXT_JUMP]: cond_id = ctx.gctx.libarch.utils.invert_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT])) # Add a fake branch, with this in the manage function # all gotos to the else_addr will be invisible. stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) continue # and-if if else_addr == nxt[BRANCH_NEXT]: cond_id = ctx.gctx.libarch.utils.get_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch) a.parent = ast a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(nxt[BRANCH_NEXT_JUMP])) stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) continue # if-else endpoint = search_endpoint(ctx, stack, ast, curr, l_set, l_prev_loop, l_start) ast_if = Ast_Branch() ast_if.parent = ast ast_if.level = level + 1 ast_if.idx_in_parent = len(ast.nodes) ast_else = Ast_Branch() ast_else.parent = ast ast_else.level = level + 1 ast_else.idx_in_parent = len(ast.nodes) else_addr = nxt[BRANCH_NEXT_JUMP] if endpoint != -1: if (l_start, endpoint) not in ctx.gph.false_loops: # If we have already seen this address (for example the # endpoint is the beginning of the current loop) we don't # re-add in the waiting list. if endpoint not in visited: manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set, l_prev_loop, l_start, False) else: endpoint = -1 stack.append( (ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) if endpoint == -1: # No endpoint, so it's not useful to have an else-branch # -> the stack will continue on `ast` a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) elif endpoint == else_addr: # Branch ast_else will be empty a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(else_addr)) else: a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast_else, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 a.idx_in_parent = len(ast.nodes) ast.add(a) ast.add(Ast_Goto(endpoint)) else: ast.add(blk) stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr)) ast = ast_head remove_all_unnecessary_goto(ast) fix_non_consecutives(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Ast generated in %fs" % elapsed) # Process ast start = time() for func in ctx.gctx.libarch.registered: func(ctx, ast) elapsed = time() elapsed = elapsed - start debug__("Functions for processing ast in %fs" % elapsed) if ctx.gctx.color: ctx.gctx.libarch.process_ast.assign_colors(ctx, ast) if waiting: ast_head.nodes.insert(0, Ast_Comment("")) ast_head.nodes.insert(0, Ast_Comment("")) ast_head.nodes.insert( 0, Ast_Comment("WARNING: there is a bug, the output is incomplete !")) ast_head.nodes.insert(0, Ast_Comment("")) ast_head.nodes.insert(0, Ast_Comment("")) return ast, False return ast, True
def get_ast_branch(ctx, paths, curr_loop_idx=[], last_else=-1): ast = Ast_Branch() is_if_printed = False if paths.rm_empty_paths(): return ast while 1: ad = paths.first() if ad in ctx.seen: ast.add(Ast_Goto(ad)) return ast # Stop at the first split or loop nb_commons, is_loop, is_ifelse, force_stop_addr = \ paths.head_last_common(curr_loop_idx) if nb_commons > 0: common_path = paths.pop(nb_commons) for ad in common_path: ctx.seen.add(ad) blk = ctx.gph.nodes[ad] # Here if we have conditional jump, it's not a ifelse, # it's a condition for a loop. It will be replaced by a # goto. ifgoto are skipped by head_last_common. if ad in ctx.gph.cond_jumps_set: inst = blk[0] # first inst ast.add(get_ast_ifgoto(ctx, paths, curr_loop_idx, inst)) else: ast.add(blk) if paths.rm_empty_paths(): return ast ad = paths.first() if ad in ctx.seen: ast.add(Ast_Goto(ad)) return ast # See comments in paths.__enter_new_loop if force_stop_addr != 0: ad = paths.first() blk = ctx.gph.nodes[ad] ast.add(blk) if ad not in ctx.gph.uncond_jumps_set: ast.add(Ast_Goto( ctx.gph.link_out[blk[0].address][BRANCH_NEXT])) break if is_loop: # last_else == -1 # -> we can't go to a same else inside a loop a, endpoint = get_ast_loop(ctx, paths, curr_loop_idx, -1) ast.add(a) elif is_ifelse: a, endpoint = get_ast_ifelse(ctx, paths, curr_loop_idx, last_else, is_if_printed) is_if_printed = isinstance(a, Ast_Ifelse) ast.add(a) else: endpoint = paths.first() if endpoint == -1 or paths.goto_addr(endpoint): break return ast
def generate_ast(ctx__): global ctx ctx = ctx__ start = time.clock() ast = Ast_Branch() ast.parent = None stack = [(ast, [], -1, ctx.entry_addr, -1)] visited = set() waiting = {} ast_head = ast while stack: ast, loops_stack, prev, curr, else_addr = stack.pop(-1) # Check if we enter in a false loop (see gotoinloop*) if loops_stack: _, _, l_start = loops_stack[-1] else: l_start = ctx.entry_addr if (l_start, curr) in ctx.gph.false_loops: continue # Check if we have already an other equivalent loop in waiting. if (l_start, curr) in ctx.gph.equiv: eq = ctx.gph.equiv[(l_start, curr)] dont_enter = False for ad in waiting: for i in waiting[ad].loop_start: if (i, ad) in eq: dont_enter = True break if dont_enter: break if dont_enter: # Restart main loop continue blk = ctx.gph.nodes[curr] # Exit the current loop while loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] l_set = ctx.gph.loops_all[(l_prev_loop, l_start)] if curr not in l_set: loops_stack.pop(-1) ast = l_ast.parent else: break if not loops_stack: l_prev_loop = -1 l_start = ctx.entry_addr l_set = None level = ast.level if curr not in visited: # Check if we need to stop and wait on a node a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set, l_prev_loop, l_start, True) if a is None: continue ast = a remove_unnecessary_goto(ast, curr) # Check if we enter in a new loop is_new_loop = True if (l_start, curr) not in ctx.gph.loops_all: is_new_loop = False else: # Check if if it's not equivalent as the current loop if loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] if (l_prev_loop, curr) in ctx.gph.equiv and \ (l_prev_loop, l_start) in ctx.gph.equiv[(l_prev_loop, curr)]: is_new_loop = False if is_new_loop: ctx.labels[curr] = "loop_0x%x" % curr level += 1 a = Ast_Loop() a.level = level a.parent = ast a.branch.parent = ast a.branch.level = level a.branch.idx_in_parent = len(ast.nodes) ast.add(a) ast = a.branch loops_stack.append((a, l_start, curr)) else_addr = -1 l_ast = a l_set = ctx.gph.loops_all[(l_start, curr)] l_prev_loop = l_start l_start = curr if (l_prev_loop, l_start) in ctx.gph.infinite_loop: a.is_infinite = True # Here curr may has changed if curr in visited: if curr == l_start: continue if len(ast.nodes) > 0: if not isinstance(ast.nodes[-1], list): ast.add(Ast_Goto(curr)) else: prev_inst = ast.nodes[-1][0] if not ctx.libarch.utils.is_uncond_jump(prev_inst): ast.add(Ast_Goto(curr)) else: ast.add(Ast_Goto(curr)) continue visited.add(curr) # Return instruction if curr not in ctx.gph.link_out: ctx.labels[curr] = "ret_0x%x" % curr ast.add(blk) continue nxt = ctx.gph.link_out[curr] if curr in ctx.dis.jmptables: ast.add(blk) for n in nxt: stack.append((ast, loops_stack, curr, n, else_addr)) elif len(nxt) == 2: # We are on a conditional jump prefetch = blk[1] if len(blk) == 2 else None if loops_stack: goto_set = False c1 = nxt[BRANCH_NEXT] not in l_set c2 = nxt[BRANCH_NEXT_JUMP] not in l_set if c1 and c2: raise ExcIfelse(curr) if c1: exit_loop = nxt[BRANCH_NEXT] nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP] cond_id = ctx.libarch.utils.invert_cond(blk[0]) goto_set = True if c2: exit_loop = nxt[BRANCH_NEXT_JUMP] nxt_node_in_loop = nxt[BRANCH_NEXT] cond_id = ctx.libarch.utils.get_cond(blk[0]) goto_set = True # goto to exit a loop if goto_set: stack.append((ast.parent, list(loops_stack), curr, exit_loop, else_addr)) stack.append((ast, list(loops_stack), curr, nxt_node_in_loop, else_addr)) a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch) a.parent = ast a.level = level ast.add(a) continue # and-if if ctx.print_andif: if else_addr == nxt[BRANCH_NEXT_JUMP]: cond_id = ctx.libarch.utils.invert_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch) a.parent = ast ast.add(a) # Add a fake branch, with this in the manage function # all gotos to the else_addr will be invisible. fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) continue # and-if if else_addr == nxt[BRANCH_NEXT]: cond_id = ctx.libarch.utils.get_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch) a.parent = ast ast.add(a) fake_br = Ast_Branch() fake_br.level = sys.maxsize stack.append((fake_br, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) continue # if-else endpoint = search_endpoint(ctx, stack, ast, curr, l_set, l_prev_loop, l_start) ast_if = Ast_Branch() ast_if.parent = ast ast_if.level = level + 1 ast_if.idx_in_parent = len(ast.nodes) ast_else = Ast_Branch() ast_else.parent = ast ast_else.level = level + 1 ast_else.idx_in_parent = len(ast.nodes) else_addr = nxt[BRANCH_NEXT_JUMP] if endpoint != -1: if (l_start, endpoint) not in ctx.gph.false_loops: manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set, l_prev_loop, l_start, False) else: endpoint = -1 stack.append( (ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) if endpoint == -1: a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) elif endpoint == else_addr: # Branch ast_else will be empty a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) else: a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast_else, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 ast.add(a) else: ast.add(blk) stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr)) ast = ast_head remove_all_unnecessary_goto(ast) add_goto_after_alone_andif(ast) add_goto_if_inst_not_consecutives(ctx, ast) elapsed = time.clock() elapsed = elapsed - start debug__("Ast generated in %fs" % elapsed) # Process ast start = time.clock() for func in ctx.libarch.registered: func(ctx, ast) elapsed = time.clock() elapsed = elapsed - start debug__("Functions for processing ast in %fs" % elapsed) if ctx.color: ctx.libarch.process_ast.assign_colors(ctx, ast) return ast
def manage_endpoint(ctx, waiting, ast, prev, ad, l_set, l_prev_loop, l_start, ad_is_visited): if ad not in ctx.gph.link_in or len(ctx.gph.link_in[ad]) <= 1: return ast # If ad_is_visited is False it means this is a prevision for a future # visit on this node. Here prev has no sense. if not ad_is_visited: if ad not in waiting: unseen = get_unseen_links_in(ad, l_set, l_prev_loop, l_start) waiting[ad] = Endpoint(ast, unseen, l_start) return None if ad in waiting: waiting[ad].rendezvous(ast, prev, l_start) if len(waiting[ad].unseen) != 0: return None # Get the ast which has the smallest level min_level_idx = -1 list_ast = waiting[ad].ast list_loop_start = waiting[ad].loop_start for i, a in enumerate(list_ast): if (list_loop_start[i], ad) in ctx.gph.false_loops: continue if min_level_idx == -1 or a.level < list_ast[min_level_idx].level: min_level_idx = i if min_level_idx == -1: print("errorD: this is a bug, please report") sys.exit(1) ast = list_ast[min_level_idx] # Add goto on each other ast # If they are finally unuseful, they will be deleted with # remove_unnecessary_goto or in remove_unnecessary_goto for i, a in enumerate(list_ast): if i == min_level_idx: continue if len(a.nodes) == 0: a.add(Ast_Goto(ad)) continue # If the previous ast is the same goto, or if it's a jump # it's not necessary to add a new goto. if isinstance(a.nodes[-1], Ast_Goto) and \ a.nodes[-1].addr_jump == ad or \ isinstance(a.nodes[-1], list) and \ a.nodes[-1][-1].address in ctx.gph.uncond_jumps_set: continue a.add(Ast_Goto(ad)) waiting[ad].ast.clear() del waiting[ad] return ast unseen = get_unseen_links_in(ad, l_set, l_prev_loop, l_start) if len(unseen) > 1: unseen.remove(prev) waiting[ad] = Endpoint(ast, unseen, l_start) return None return ast
def generate_ast(ctx__): global ctx ctx = ctx__ start = time.clock() ast = Ast_Branch() ast.parent = None stack = [(ast, [], -1, ctx.entry_addr, -1)] visited = set() waiting = {} ast_head = ast while stack: ast, loops_stack, prev, curr, else_addr = stack.pop(-1) # Check if we enter in a false loop (see gotoinloop*) if loops_stack: _, _, l_start = loops_stack[-1] else: l_start = ctx.entry_addr if (l_start, curr) in ctx.gph.false_loops: continue # Check if we have already an other equivalent loop in waiting. if (l_start, curr) in ctx.gph.equiv: eq = ctx.gph.equiv[(l_start, curr)] dont_enter = False for ad in waiting: for i in waiting[ad].loop_start: if (i, ad) in eq: dont_enter = True break if dont_enter: break if dont_enter: # Restart main loop continue blk = ctx.gph.nodes[curr] # Exit the current loop while loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] l_set = ctx.gph.loops_all[(l_prev_loop, l_start)] if curr not in l_set: loops_stack.pop(-1) ast = l_ast.parent else: break if not loops_stack: l_prev_loop = -1 l_start = ctx.entry_addr l_set = None level = ast.level # Add a goto for more readability if len(ast.nodes) > 0: last_node = ast.nodes[-1] if isinstance(last_node, (Ast_AndIf, Ast_Ifelse)): if curr != last_node.expected_next_addr: a = Ast_Goto(last_node.expected_next_addr) a.dont_remove = True ast.add(a) if curr not in visited: # Check if we need to stop and wait on a node a = manage_endpoint(ctx, waiting, ast, prev, curr, l_set, l_prev_loop, l_start, True) if a is None: continue ast = a remove_unnecessary_goto(ast, curr) # Check if we enter in a new loop is_new_loop = True if (l_start, curr) not in ctx.gph.loops_all: is_new_loop = False else: # Check if if it's not equivalent as the current loop if loops_stack: l_ast, l_prev_loop, l_start = loops_stack[-1] if (l_prev_loop, curr) in ctx.gph.equiv and \ (l_prev_loop, l_start) in ctx.gph.equiv[(l_prev_loop, curr)]: is_new_loop = False if is_new_loop: level += 1 a = Ast_Loop() a.level = level a.parent = ast a.branch.parent = ast a.branch.level = level a.branch.idx_in_parent = len(ast.nodes) ast.add(a) ast = a.branch loops_stack.append((a, l_start, curr)) else_addr = -1 l_ast = a l_set = ctx.gph.loops_all[(l_start, curr)] l_prev_loop = l_start l_start = curr if (l_prev_loop, l_start) in ctx.gph.infinite_loop: a.is_infinite = True # Here curr may has changed if curr in visited: if curr == l_start: continue if not isinstance(ast.nodes[-1], list): continue prev_inst = ast.nodes[-1][0] if not ctx.libarch.utils.is_uncond_jump(prev_inst): ast.add(Ast_Goto(curr)) continue visited.add(curr) # Return instruction if curr not in ctx.gph.link_out: ast.add(blk) continue nxt = ctx.gph.link_out[curr] if len(nxt) == 2: # We are on a conditional jump prefetch = blk[1] if len(blk) == 2 else None if loops_stack: goto_set = False c1 = nxt[BRANCH_NEXT] not in l_set c2 = nxt[BRANCH_NEXT_JUMP] not in l_set if c1 and c2: raise ExcIfelse(curr) if c1: exit_loop = nxt[BRANCH_NEXT] nxt_node_in_loop = nxt[BRANCH_NEXT_JUMP] cond_id = ctx.libarch.utils.invert_cond(blk[0]) goto_set = True if c2: exit_loop = nxt[BRANCH_NEXT_JUMP] nxt_node_in_loop = nxt[BRANCH_NEXT] cond_id = ctx.libarch.utils.get_cond(blk[0]) goto_set = True # goto to exit a loop if goto_set: stack.append((ast.parent, list(loops_stack), curr, exit_loop, else_addr)) stack.append((ast, list(loops_stack), curr, nxt_node_in_loop, else_addr)) a = Ast_IfGoto(blk[0], cond_id, exit_loop, prefetch) a.parent = ast a.level = level ast.add(a) continue # and-if if ctx.print_andif: if else_addr == nxt[BRANCH_NEXT_JUMP]: cond_id = ctx.libarch.utils.invert_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT], prefetch) a.parent = ast ast.add(a) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) continue # and-if if else_addr == nxt[BRANCH_NEXT]: cond_id = ctx.libarch.utils.get_cond(blk[0]) a = Ast_AndIf(blk[0], cond_id, nxt[BRANCH_NEXT_JUMP], prefetch) a.parent = ast ast.add(a) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) continue # if-else endpoint = search_endpoint(ctx, stack, ast, curr, l_set, l_prev_loop, l_start) ast_if = Ast_Branch() ast_if.parent = ast ast_if.level = level + 1 ast_if.idx_in_parent = len(ast.nodes) ast_else = Ast_Branch() ast_else.parent = ast ast_else.level = level + 1 ast_else.idx_in_parent = len(ast.nodes) else_addr = nxt[BRANCH_NEXT_JUMP] if endpoint != -1: if (l_start, endpoint) not in ctx.gph.false_loops: manage_endpoint(ctx, waiting, ast, -1, endpoint, l_set, l_prev_loop, l_start, False) else: endpoint = -1 stack.append((ast_if, list(loops_stack), curr, nxt[BRANCH_NEXT], else_addr)) if endpoint == -1: a = Ast_Ifelse(blk[0], ast_else, ast_if, else_addr, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) elif endpoint == else_addr: # Branch ast_else will be empty a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) else: a = Ast_Ifelse(blk[0], ast_else, ast_if, endpoint, prefetch) stack.append((ast_else, list(loops_stack), curr, nxt[BRANCH_NEXT_JUMP], else_addr)) a.parent = ast a.level = level + 1 ast.add(a) else: ast.add(blk) stack.append((ast, loops_stack, curr, nxt[BRANCH_NEXT], else_addr)) ast = ast_head remove_all_unnecessary_goto(ast) elapsed = time.clock() elapsed = elapsed - start debug__("Ast generated in %fs" % elapsed) # Process ast start = time.clock() for func in ctx.libarch.registered: func(ctx, ast) elapsed = time.clock() elapsed = elapsed - start debug__("Functions for processing ast in %fs" % elapsed) if ctx.color: ctx.libarch.process_ast.assign_colors(ctx, ast) return ast