def modules_diff(mod_first, mod_second, glob_var, fun, config): """ Analyse semantic difference of two LLVM IR modules w.r.t. some parameter :param mod_first: First LLVM module :param mod_second: Second LLVM module :param glob_var: Parameter (global variable) to compare (if specified, all functions using this variable are compared). :param fun: Function to be compared. :param config: Configuration. """ result = Result(Result.Kind.NONE, mod_first, mod_second) if fun: funs_first = {fun} funs_second = {fun} elif glob_var: funs_first = mod_first.get_functions_using_param(glob_var) funs_second = mod_second.get_functions_using_param(glob_var) else: funs_first = [] funs_second = [] for fun in funs_first: if fun not in funs_second: continue if (not mod_first.has_function(fun) or not mod_second.has_function(fun)): print(" Given function not found in module") result.kind = Result.Kind.ERROR return result fun_result = functions_diff(mod_first=mod_first, mod_second=mod_second, fun_first=fun, fun_second=fun, glob_var=glob_var, config=config) result.add_inner(fun_result) return result
def compare(args): """ Compare snapshots of linux kernels. Runs the semantic comparison and shows information about the compared functions that are semantically different. """ # Parse both the new and the old snapshot. old_snapshot = Snapshot.load_from_dir(args.snapshot_dir_old) new_snapshot = Snapshot.load_from_dir(args.snapshot_dir_new) # Set the output directory if not args.stdout: if args.output_dir: output_dir = args.output_dir if os.path.isdir(output_dir): sys.stderr.write("Error: output directory exists\n") sys.exit(errno.EEXIST) else: output_dir = default_output_dir(args.snapshot_dir_old, args.snapshot_dir_new) else: output_dir = None if args.function: old_snapshot.filter([args.function]) new_snapshot.filter([args.function]) config = Config(old_snapshot, new_snapshot, args.show_diff, args.output_llvm_ir, args.control_flow_only, args.print_asm_diffs, args.verbose, args.enable_simpll_ffi, args.semdiff_tool) result = Result(Result.Kind.NONE, args.snapshot_dir_old, args.snapshot_dir_old, start_time=default_timer()) for group_name, group in sorted(old_snapshot.fun_groups.items()): group_printed = False # Set the group directory if output_dir is not None and group_name is not None: group_dir = os.path.join(output_dir, group_name) else: group_dir = None result_graph = None cache = SimpLLCache(mkdtemp()) if args.enable_module_cache: module_cache = _generate_module_cache(group.functions.items(), group_name, new_snapshot, 3) else: module_cache = None for fun, old_fun_desc in sorted(group.functions.items()): # Check if the function exists in the other snapshot new_fun_desc = new_snapshot.get_by_name(fun, group_name) if not new_fun_desc: continue # Check if the module exists in both snapshots if old_fun_desc.mod is None or new_fun_desc.mod is None: result.add_inner(Result(Result.Kind.UNKNOWN, fun, fun)) if group_name is not None and not group_printed: print("{}:".format(group_name)) group_printed = True print("{}: unknown".format(fun)) continue # If function has a global variable, set it glob_var = KernelParam(old_fun_desc.glob_var) \ if old_fun_desc.glob_var else None # Run the semantic diff fun_result = functions_diff(mod_first=old_fun_desc.mod, mod_second=new_fun_desc.mod, fun_first=fun, fun_second=fun, glob_var=glob_var, config=config, prev_result_graph=result_graph, function_cache=cache, module_cache=module_cache) result_graph = fun_result.graph if fun_result is not None: if args.regex_filter is not None: # Filter results by regex pattern = re.compile(args.regex_filter) for called_res in fun_result.inner.values(): if pattern.search(called_res.diff): break else: fun_result.kind = Result.Kind.EQUAL_SYNTAX result.add_inner(fun_result) # Printing information about failures and non-equal functions. if fun_result.kind in [ Result.Kind.NOT_EQUAL, Result.Kind.ERROR, Result.Kind.UNKNOWN ]: if fun_result.kind == Result.Kind.NOT_EQUAL: # Create the output directory if needed if output_dir is not None: if not os.path.isdir(output_dir): os.mkdir(output_dir) # Create the group directory or print the group name # if needed if group_dir is not None: if not os.path.isdir(group_dir): os.mkdir(group_dir) elif group_name is not None and not group_printed: print("{}:".format(group_name)) group_printed = True print_syntax_diff( snapshot_dir_old=args.snapshot_dir_old, snapshot_dir_new=args.snapshot_dir_new, fun=fun, fun_result=fun_result, fun_tag=old_fun_desc.tag, output_dir=group_dir if group_dir else output_dir, show_diff=args.show_diff, initial_indent=2 if (group_name is not None and group_dir is None) else 0) else: # Print the group name if needed if group_name is not None and not group_printed: print("{}:".format(group_name)) group_printed = True print("{}: {}".format(fun, str(fun_result.kind))) # Clean LLVM modules (allow GC to collect the occupied memory) old_fun_desc.mod.clean_module() new_fun_desc.mod.clean_module() LlvmKernelModule.clean_all() old_snapshot.finalize() new_snapshot.finalize() if output_dir is not None and os.path.isdir(output_dir): print("Differences stored in {}/".format(output_dir)) if args.report_stat: print("") print("Statistics") print("----------") result.stop_time = default_timer() result.report_stat(args.show_errors) return 0
def _run_llreve_z3(first, second, funFirst, funSecond, coupled, timeout, verbose): """ Run the comparison of semantics of two functions using the llreve tool and the Z3 SMT solver. The llreve tool takes compared functions in LLVM IR and generates a formula in first-order predicate logic. The formula is then solved using the Z3 solver. If it is unsatisfiable, the compared functions are semantically the same, otherwise, they are different. The generated formula is in the theory of bitvectors. :param first: File with the first LLVM module :param second: File with the second LLVM module :param funFirst: Function from the first module to be compared :param funSecond: Function from the second module to be compared :param coupled: List of coupled functions (functions that are supposed to correspond to each other in both modules). These are needed for functions not having definintions. :param timeout: Timeout for the analysis in seconds :param verbose: Verbosity option """ stderr = None if not verbose: stderr = open('/dev/null', 'w') # Commands for running llreve and Z3 (output of llreve is piped into Z3) command = [ "build/llreve/reve/reve/llreve", first, second, "--fun=" + funFirst + "," + funSecond, "-muz", "--ir-input", "--bitvect", "--infer-marks", "--disable-auto-coupling" ] for c in coupled: command.append("--couple-functions={},{}".format(c[0], c[1])) if verbose: sys.stderr.write(" ".join(command) + "\n") llreve_process = Popen(command, stdout=PIPE, stderr=stderr) z3_process = Popen(["z3", "fixedpoint.engine=duality", "-in"], stdin=llreve_process.stdout, stdout=PIPE, stderr=stderr) # Set timeout for both tools timer = Timer(timeout, _kill, [[llreve_process, z3_process]]) try: timer.start() z3_process.wait() result_kind = Result.Kind.ERROR # Processing the output for line in z3_process.stdout: line = line.strip() if line == b"sat": result_kind = Result.Kind.NOT_EQUAL elif line == b"unsat": result_kind = Result.Kind.EQUAL elif line == b"unknown": result_kind = Result.Kind.UNKNOWN if z3_process.returncode != 0: result_kind = Result.Kind.ERROR finally: if not timer.is_alive(): result_kind = Result.Kind.TIMEOUT timer.cancel() return Result(result_kind, first, second)
def functions_diff(mod_first, mod_second, fun_first, fun_second, glob_var, config, prev_result_graph=None, function_cache=None): """ Compare two functions for equality. First, functions are simplified and compared for syntactic equality using the SimpLL tool. If they are not syntactically equal, SimpLL prints a list of functions that the syntactic equality depends on. These are then compared for semantic equality. :param mod_first: First LLVM module :param mod_second: Second LLVM module :param fun_first: Function from the first module to be compared :param fun_second: Function from the second module to be compared :param glob_var: Global variable whose effect on the functions to compare :param config: Configuration :param prev_result_graph: Graph generated by the previous comparison (used to pass already known results to be used in this comparison). :param function_cache: Cache for SimpLL containing all functions present in the current graph (passed to this function to be updated with the results of the comparison). """ result = Result(Result.Kind.NONE, fun_first, fun_second) curr_result_graph = None try: if config.verbosity: if fun_first == fun_second: fun_str = fun_first else: fun_str = "{} and {}".format(fun_first, fun_second) print("Syntactic diff of {} (in {})".format( fun_str, mod_first.llvm)) simplify = True while simplify: simplify = False if (prev_result_graph and fun_first in prev_result_graph.vertices and (prev_result_graph.vertices[fun_first].result != Result.Kind.ASSUMED_EQUAL)): first_simpl = "" second_simpl = "" curr_result_graph = prev_result_graph else: # Simplify modules and get the output graph. first_simpl, second_simpl, curr_result_graph, missing_defs = \ run_simpll(first=mod_first.llvm, second=mod_second.llvm, fun_first=fun_first, fun_second=fun_second, var=glob_var.name if glob_var else None, suffix=glob_var.name if glob_var else "simpl", cache_dir=function_cache.directory if function_cache else None, control_flow_only=config.control_flow_only, output_llvm_ir=config.output_llvm_ir, print_asm_diffs=config.print_asm_diffs, verbose=config.verbosity, use_ffi=config.use_ffi) if missing_defs: # If there are missing function definitions, try to find # their implementation, link them to the current modules, # and rerun the simplification. for fun_pair in missing_defs: if "first" in fun_pair: if _link_symbol_def(config.snapshot_first, mod_first, fun_pair["first"]): simplify = True if "second" in fun_pair: if _link_symbol_def(config.snapshot_second, mod_second, fun_pair["second"]): simplify = True if prev_result_graph and not simplify: # Note: "curr_result_graph" is here the partial result # graph, i.e. can contain unknown results that are known in # the graph from the previous comparison. prev_result_graph.absorb_graph(curr_result_graph) curr_result_graph = prev_result_graph # Add the newly received results to the ignored functions # file. # Note: there won't be any duplicates, since all functions # that were in the cache before will be marked as unknown. if function_cache: function_cache.update([ v for v in curr_result_graph.vertices.values() if v.result not in [Result.Kind.UNKNOWN, Result.Kind.ASSUMED_EQUAL] ]) objects_to_compare, syndiff_bodies_left, syndiff_bodies_right = \ curr_result_graph.graph_to_fun_pair_list(fun_first, fun_second) mod_first.restore_unlinked_llvm() mod_second.restore_unlinked_llvm() if not objects_to_compare: result.kind = Result.Kind.EQUAL_SYNTAX else: # If the functions are not syntactically equal, objects_to_compare # contains a list of functions and macros that are different. for fun_pair in objects_to_compare: if (not fun_pair[0].diff_kind == "function" and config.semdiff_tool is not None): # If a semantic diff tool is set, use it for further # comparison of non-equal functions fun_result = functions_semdiff(first_simpl, second_simpl, fun_pair[0].name, fun_pair[1].name, config) else: fun_result = Result(fun_pair[2], fun_first, fun_second) fun_result.first = fun_pair[0] fun_result.second = fun_pair[1] if fun_result.kind == Result.Kind.NOT_EQUAL: if fun_result.first.diff_kind in ["function", "type"]: # Get the syntactic diff of functions or types fun_result.diff = syntax_diff( fun_result.first.filename, fun_result.second.filename, fun_result.first.name, fun_result.first.diff_kind, fun_pair[0].line, fun_pair[1].line) elif fun_result.first.diff_kind == "syntactic": # Find the syntax differences and append the left and # right value to create the resulting diff fun_result.diff = " {}\n\n {}\n".format( syndiff_bodies_left[fun_result.first.name], syndiff_bodies_right[fun_result.second.name]) else: sys.stderr.write( "warning: unknown diff kind: {}\n".format( fun_result.first.diff_kind)) fun_result.diff = "unknown\n" result.add_inner(fun_result) if config.verbosity: print(" {}".format(result)) except ValueError: result.kind = Result.Kind.ERROR except SimpLLException as e: if config.verbosity: print(e) result.kind = Result.Kind.ERROR result.graph = (curr_result_graph if curr_result_graph else prev_result_graph) return result
def diff_all_modules_using_global(glob_first, glob_second, config): """ Compare semantics of all modules using given global variable. Finds all source files that use the given globals and compare all of them. :param glob_first: First global to compare :param glob_second: Second global to compare :param config: Configuration """ result = Result(Result.Kind.NONE, glob_first.name, glob_second.name) if glob_first.name != glob_second.name: # Variables with different names are treated as unequal result.kind = Result.Kind.NOT_EQUAL return result srcs_first = config.source_first.find_srcs_using_symbol(glob_first.name) srcs_second = config.source_second.find_srcs_using_symbol(glob_second.name) # Compare all sources containing functions using the variable for src in srcs_first: if src not in srcs_second: result.add_inner(Result(Result.Kind.NOT_EQUAL, src, src)) else: try: mod_first = config.source_first.get_module_from_source(src) mod_second = config.source_second.get_module_from_source(src) mod_first.parse_module() mod_second.parse_module() if (mod_first.has_global(glob_first.name) and mod_second.has_global(glob_second.name)): src_result = modules_diff( mod_first=mod_first, mod_second=mod_second, glob_var=glob_first, fun=None, config=config) for res in src_result.inner.values(): result.add_inner(res) except BuildException as e: if config.verbosity: print(e) result.add_inner(Result(Result.Kind.ERROR, src, src)) return result
def compare(args): old_functions = FunctionList(args.snapshot_dir_old) with open(os.path.join(args.snapshot_dir_old, "functions.yaml"), "r") as fun_list_yaml: old_functions.from_yaml(fun_list_yaml.read()) old_source = KernelSource(args.snapshot_dir_old) new_functions = FunctionList(args.snapshot_dir_new) with open(os.path.join(args.snapshot_dir_new, "functions.yaml"), "r") as fun_list_yaml: new_functions.from_yaml(fun_list_yaml.read()) new_source = KernelSource(args.snapshot_dir_new) if args.function: old_functions.filter([args.function]) new_functions.filter([args.function]) config = Config(old_source, new_source, args.show_diff, args.control_flow_only, args.verbose, args.semdiff_tool) result = Result(Result.Kind.NONE, args.snapshot_dir_old, args.snapshot_dir_old) for fun, old_mod in sorted(old_functions.functions.items()): new_mod = new_functions.get_by_name(fun) if not (old_mod.has_function(fun) and new_mod.has_function(fun)): continue fun_result = functions_diff( mod_first=old_mod, mod_second=new_mod, fun_first=fun, fun_second=fun, glob_var=None, config=config) if fun_result is not None: if args.regex_filter is not None: # Filter results by regex pattern = re.compile(args.regex_filter) for called_res in fun_result.inner.values(): if pattern.search(called_res.diff): break else: fun_result.kind = Result.Kind.EQUAL_SYNTAX result.add_inner(fun_result) if fun_result.kind in [Result.Kind.ERROR, Result.Kind.UNKNOWN]: print("{}: {}".format(fun, str(fun_result.kind))) elif fun_result.kind == Result.Kind.NOT_EQUAL: print_syntax_diff(args.snapshot_dir_old, args.snapshot_dir_new, fun, fun_result, False, args.show_diff) # Clean LLVM modules (allow GC to collect the occupied memory) old_mod.clean_module() new_mod.clean_module() LlvmKernelModule.clean_all() if args.report_stat: print("") print("Statistics") print("----------") result.report_stat(args.show_errors) return 0
def graph_to_fun_pair_list(self, fun_first, fun_second): # Extract the functions that should be compared from the graph in # the form of Vertex objects. called_funs_left, backtracking_map_left = self.reachable_from( ComparisonGraph.Side.LEFT, fun_first) called_funs_right, backtracking_map_right = self.reachable_from( ComparisonGraph.Side.RIGHT, fun_second) # Use the intersection of called functions in the left and right # module (i.e. differences in functions called in one module only # are not processed). vertices_to_compare = list(set(called_funs_left).intersection( set(called_funs_right))) # Use methods from ComparisonGraph (on the graph variable) and # vertices_to_compare to generate objects_to_compare. objects_to_compare = [] syndiff_bodies_left = dict() syndiff_bodies_right = dict() for vertex in vertices_to_compare: if vertex.result in [Result.Kind.EQUAL, Result.Kind.ASSUMED_EQUAL]: # Do not include equal functions into the result. continue # Generate and add the function difference. fun_pair = [] for side in ComparisonGraph.Side: fun = fun_first if side == ComparisonGraph.Side.LEFT \ else fun_second backtracking_map = (backtracking_map_left if side == ComparisonGraph.Side.LEFT else backtracking_map_right) if fun == vertex.names[side]: # There is no callstack from the base function. calls = None else: # Transform the Edge objects returned by # get_shortest_path to a readable callstack. edges = _get_callstack(backtracking_map, self[fun], vertex) calls = self._edge_callstack_to_string(edges) # Note: a function diff is covered (i.e. hidden when empty) # if and only if there is a non-function difference # referencing it. fun_pair.append(Result.Entity( vertex.names[side], vertex.files[side], vertex.lines[side], calls, "function", covered=len(vertex.nonfun_diffs) != 0 )) fun_pair.append(vertex.result) objects_to_compare.append(tuple(fun_pair)) # Process non-function differences. for nonfun_diff in vertex.nonfun_diffs: nonfun_pair = [] for side in ComparisonGraph.Side: syndiff_bodies = (syndiff_bodies_left if side == ComparisonGraph.Side.LEFT else syndiff_bodies_right) backtracking_map = (backtracking_map_left if side == ComparisonGraph.Side.LEFT else backtracking_map_right) # Convert the YAML callstack format to string. calls = self._yaml_callstack_to_string( nonfun_diff.callstack[side]) # Append the parent function's callstack. # (unless it is the base function) fun = fun_first if side == ComparisonGraph.Side.LEFT \ else fun_second if nonfun_diff.parent_fun != fun: parent_calls = self._edge_callstack_to_string( _get_callstack(backtracking_map, self[fun], vertex)) calls = parent_calls + "\n" + calls if isinstance(nonfun_diff, ComparisonGraph.SyntaxDiff): nonfun_pair.append(Result.Entity( nonfun_diff.name, None, None, calls, "syntactic", False )) syndiff_bodies[nonfun_diff.name] = \ nonfun_diff.body[side] elif isinstance(nonfun_diff, ComparisonGraph.TypeDiff): nonfun_pair.append(Result.Entity( nonfun_diff.name, nonfun_diff.file[side], nonfun_diff.line[side], calls, "type", False )) # Non-function differences are always of the non-equal type nonfun_pair.append(Result.Kind.NOT_EQUAL) objects_to_compare.append(tuple(nonfun_pair)) return objects_to_compare, syndiff_bodies_left, syndiff_bodies_right
def simplify_modules_diff(first, second, fun_first, fun_second, var, suffix=None, control_flow_only=False, verbose=False): """ Simplify modules to ease their semantic difference. Uses the SimpLL tool. """ stderr = None if not verbose: stderr = open(os.devnull, "w") first_out_name = add_suffix(first, suffix) if suffix else first second_out_name = add_suffix(second, suffix) if suffix else second try: simpll_command = [ "build/diffkemp/simpll/simpll", first, second, "--print-callstacks" ] # Main (analysed) functions simpll_command.append("--fun") if fun_first != fun_second: simpll_command.append("{},{}".format(fun_first, fun_second)) else: simpll_command.append(fun_first) # Analysed variable if var: simpll_command.extend(["--var", var]) # Suffix for output files if suffix: simpll_command.extend(["--suffix", suffix]) if control_flow_only: simpll_command.append("--control-flow") if verbose: simpll_command.append("--verbose") print(" ".join(simpll_command)) simpll_out = check_output(simpll_command) check_call([ "opt", "-S", "-deadargelim", "-o", first_out_name, first_out_name ], stderr=stderr) check_call([ "opt", "-S", "-deadargelim", "-o", second_out_name, second_out_name ], stderr=stderr) first_out = LlvmKernelModule(first_out_name) second_out = LlvmKernelModule(second_out_name) objects_to_compare = [] missing_defs = None syndiff_defs = None try: simpll_result = yaml.safe_load(simpll_out) if simpll_result is not None: if "diff-functions" in simpll_result: for fun_pair_yaml in simpll_result["diff-functions"]: fun_pair = [ Result.Entity( fun["function"], fun["file"] if "file" in fun else "", fun["line"] if "line" in fun else None, "\n".join([ "{} at {}:{}".format( call["function"], call["file"], call["line"]) for call in fun["callstack"] ]) if "callstack" in fun else "", fun["is-syn-diff"], fun["covered-by-syn-diff"]) for fun in [fun_pair_yaml["first"], fun_pair_yaml["second"]] ] objects_to_compare.append(tuple(fun_pair)) missing_defs = simpll_result["missing-defs"] \ if "missing-defs" in simpll_result else None syndiff_defs = simpll_result["syndiff-defs"] \ if "syndiff-defs" in simpll_result else None except yaml.YAMLError: pass return first_out, second_out, objects_to_compare, missing_defs, \ syndiff_defs except CalledProcessError: raise SimpLLException("Simplifying files failed")