def test_syntax_diff(task_spec): """ Test correctness of the obtained syntax diff. The expected difference is obtained by concatenation of symbol definitions in the compared versions, which is the way that DiffKemp uses for displaying diffs of macros and inline assemblies. Function differences are displayed using the diff utility and this test cannot be currently used to verify them. """ for fun_spec in task_spec.functions.values(): result = functions_diff(mod_first=fun_spec.old_module, mod_second=fun_spec.new_module, fun_first=fun_spec.name, fun_second=fun_spec.name, glob_var=None, config=task_spec.config) assert result.kind == fun_spec.result for symbol, symbol_result in result.inner.items(): if symbol in task_spec.syntax_diffs: diff_spec = task_spec.syntax_diffs[symbol] # Compare the obtained diff with the expected one, omitting # all whitespace actual_diff = re.sub(r"\s+", "", symbol_result.diff) expected_diff = re.sub(r"\s+", "", diff_spec.def_old + diff_spec.def_new) assert actual_diff == expected_diff
def test_functions_diff(self, task_spec): """Test comparison of semantic difference of individual functions.""" for fun_spec in task_spec.functions.values(): if fun_spec.result not in [Result.Kind.TIMEOUT, Result.Kind.NONE]: result = functions_diff(mod_first=task_spec.old_module, mod_second=task_spec.new_module, fun_first=fun_spec.name, fun_second=fun_spec.name, glob_var=task_spec.get_param(), config=task_spec.config) assert result.kind == fun_spec.result
def test_function_diff(task_spec): """Test comparison of semantic difference of functions.""" for fun_spec in task_spec.functions.values(): if fun_spec.result != Result.Kind.TIMEOUT: result = functions_diff(mod_first=fun_spec.old_module, mod_second=fun_spec.new_module, fun_first=fun_spec.name, fun_second=fun_spec.name, glob_var=None, config=task_spec.config) assert result.kind == fun_spec.result
def test_proc_handler(self, task_spec): """ Test comparison of semantic difference of the proc_handler function. """ proc_handler = task_spec.get_proc_handler_spec() if proc_handler.result not in [Result.Kind.TIMEOUT, Result.Kind.NONE]: result = functions_diff(mod_first=proc_handler.old_module, mod_second=proc_handler.new_module, fun_first=proc_handler.name, fun_second=proc_handler.name, glob_var=None, config=task_spec.config) assert result.kind == proc_handler.result
def test_data_functions(self, task_spec): """ Test comparison of semantic difference of functions using the data variable associated with the sysctl parameter. """ # Get the data variable KernelParam object data_kernel_param = task_spec.old_sysctl_module.get_data( task_spec.name) for fun, fun_spec in task_spec.functions.items(): if fun == task_spec.proc_handler: continue if fun_spec.result != Result.Kind.TIMEOUT: result = functions_diff(mod_first=fun_spec.old_module, mod_second=fun_spec.new_module, fun_first=fun_spec.name, fun_second=fun_spec.name, glob_var=data_kernel_param, config=task_spec.config) assert result.kind == fun_spec.result
def modules_diff(mod_first, mod_second, glob_var, fun, config): """ Analyse semantic difference of two LLVM IR modules w.r.t. some parameter :param mod_first: First LLVM module :param mod_second: Second LLVM module :param glob_var: Parameter (global variable) to compare (if specified, all functions using this variable are compared). :param fun: Function to be compared. :param config: Configuration. """ result = Result(Result.Kind.NONE, mod_first, mod_second) if fun: funs_first = {fun} funs_second = {fun} elif glob_var: funs_first = mod_first.get_functions_using_param(glob_var) funs_second = mod_second.get_functions_using_param(glob_var) else: funs_first = [] funs_second = [] for fun in funs_first: if fun not in funs_second: continue if (not mod_first.has_function(fun) or not mod_second.has_function(fun)): print(" Given function not found in module") result.kind = Result.Kind.ERROR return result fun_result = functions_diff(mod_first=mod_first, mod_second=mod_second, fun_first=fun, fun_second=fun, glob_var=glob_var, config=config) result.add_inner(fun_result) return result
def compare(args): """ Compare snapshots of linux kernels. Runs the semantic comparison and shows information about the compared functions that are semantically different. """ # Parse both the new and the old snapshot. old_snapshot = Snapshot.load_from_dir(args.snapshot_dir_old) new_snapshot = Snapshot.load_from_dir(args.snapshot_dir_new) # Set the output directory if not args.stdout: if args.output_dir: output_dir = args.output_dir if os.path.isdir(output_dir): sys.stderr.write("Error: output directory exists\n") sys.exit(errno.EEXIST) else: output_dir = default_output_dir(args.snapshot_dir_old, args.snapshot_dir_new) else: output_dir = None if args.function: old_snapshot.filter([args.function]) new_snapshot.filter([args.function]) config = Config(old_snapshot, new_snapshot, args.show_diff, args.output_llvm_ir, args.control_flow_only, args.print_asm_diffs, args.verbose, args.enable_simpll_ffi, args.semdiff_tool) result = Result(Result.Kind.NONE, args.snapshot_dir_old, args.snapshot_dir_old, start_time=default_timer()) for group_name, group in sorted(old_snapshot.fun_groups.items()): group_printed = False # Set the group directory if output_dir is not None and group_name is not None: group_dir = os.path.join(output_dir, group_name) else: group_dir = None result_graph = None cache = SimpLLCache(mkdtemp()) if args.enable_module_cache: module_cache = _generate_module_cache(group.functions.items(), group_name, new_snapshot, 3) else: module_cache = None for fun, old_fun_desc in sorted(group.functions.items()): # Check if the function exists in the other snapshot new_fun_desc = new_snapshot.get_by_name(fun, group_name) if not new_fun_desc: continue # Check if the module exists in both snapshots if old_fun_desc.mod is None or new_fun_desc.mod is None: result.add_inner(Result(Result.Kind.UNKNOWN, fun, fun)) if group_name is not None and not group_printed: print("{}:".format(group_name)) group_printed = True print("{}: unknown".format(fun)) continue # If function has a global variable, set it glob_var = KernelParam(old_fun_desc.glob_var) \ if old_fun_desc.glob_var else None # Run the semantic diff fun_result = functions_diff(mod_first=old_fun_desc.mod, mod_second=new_fun_desc.mod, fun_first=fun, fun_second=fun, glob_var=glob_var, config=config, prev_result_graph=result_graph, function_cache=cache, module_cache=module_cache) result_graph = fun_result.graph if fun_result is not None: if args.regex_filter is not None: # Filter results by regex pattern = re.compile(args.regex_filter) for called_res in fun_result.inner.values(): if pattern.search(called_res.diff): break else: fun_result.kind = Result.Kind.EQUAL_SYNTAX result.add_inner(fun_result) # Printing information about failures and non-equal functions. if fun_result.kind in [ Result.Kind.NOT_EQUAL, Result.Kind.ERROR, Result.Kind.UNKNOWN ]: if fun_result.kind == Result.Kind.NOT_EQUAL: # Create the output directory if needed if output_dir is not None: if not os.path.isdir(output_dir): os.mkdir(output_dir) # Create the group directory or print the group name # if needed if group_dir is not None: if not os.path.isdir(group_dir): os.mkdir(group_dir) elif group_name is not None and not group_printed: print("{}:".format(group_name)) group_printed = True print_syntax_diff( snapshot_dir_old=args.snapshot_dir_old, snapshot_dir_new=args.snapshot_dir_new, fun=fun, fun_result=fun_result, fun_tag=old_fun_desc.tag, output_dir=group_dir if group_dir else output_dir, show_diff=args.show_diff, initial_indent=2 if (group_name is not None and group_dir is None) else 0) else: # Print the group name if needed if group_name is not None and not group_printed: print("{}:".format(group_name)) group_printed = True print("{}: {}".format(fun, str(fun_result.kind))) # Clean LLVM modules (allow GC to collect the occupied memory) old_fun_desc.mod.clean_module() new_fun_desc.mod.clean_module() LlvmKernelModule.clean_all() old_snapshot.finalize() new_snapshot.finalize() if output_dir is not None and os.path.isdir(output_dir): print("Differences stored in {}/".format(output_dir)) if args.report_stat: print("") print("Statistics") print("----------") result.stop_time = default_timer() result.report_stat(args.show_errors) return 0
def compare(args): old_functions = FunctionList(args.snapshot_dir_old) with open(os.path.join(args.snapshot_dir_old, "functions.yaml"), "r") as fun_list_yaml: old_functions.from_yaml(fun_list_yaml.read()) old_source = KernelSource(args.snapshot_dir_old) new_functions = FunctionList(args.snapshot_dir_new) with open(os.path.join(args.snapshot_dir_new, "functions.yaml"), "r") as fun_list_yaml: new_functions.from_yaml(fun_list_yaml.read()) new_source = KernelSource(args.snapshot_dir_new) if args.function: old_functions.filter([args.function]) new_functions.filter([args.function]) config = Config(old_source, new_source, args.show_diff, args.control_flow_only, args.verbose, args.semdiff_tool) result = Result(Result.Kind.NONE, args.snapshot_dir_old, args.snapshot_dir_old) for fun, old_mod in sorted(old_functions.functions.items()): new_mod = new_functions.get_by_name(fun) if not (old_mod.has_function(fun) and new_mod.has_function(fun)): continue fun_result = functions_diff( mod_first=old_mod, mod_second=new_mod, fun_first=fun, fun_second=fun, glob_var=None, config=config) if fun_result is not None: if args.regex_filter is not None: # Filter results by regex pattern = re.compile(args.regex_filter) for called_res in fun_result.inner.values(): if pattern.search(called_res.diff): break else: fun_result.kind = Result.Kind.EQUAL_SYNTAX result.add_inner(fun_result) if fun_result.kind in [Result.Kind.ERROR, Result.Kind.UNKNOWN]: print("{}: {}".format(fun, str(fun_result.kind))) elif fun_result.kind == Result.Kind.NOT_EQUAL: print_syntax_diff(args.snapshot_dir_old, args.snapshot_dir_new, fun, fun_result, False, args.show_diff) # Clean LLVM modules (allow GC to collect the occupied memory) old_mod.clean_module() new_mod.clean_module() LlvmKernelModule.clean_all() if args.report_stat: print("") print("Statistics") print("----------") result.report_stat(args.show_errors) return 0