def __init__(self, spec, task_name, kernel_path): self.old_kernel_dir = os.path.join(kernel_path, spec["old_kernel"]) self.new_kernel_dir = os.path.join(kernel_path, spec["new_kernel"]) self.name = task_name self.task_dir = os.path.join(tasks_path, task_name) if "pattern_config" in spec: self.pattern_config = PatternConfig.create_from_file( path=os.path.join(patterns_path, spec["pattern_config"]), patterns_path=base_path) else: self.pattern_config = None if "control_flow_only" in spec: self.control_flow_only = spec["control_flow_only"] else: self.control_flow_only = False # Create LLVM sources and configuration self.old_kernel = KernelSourceTree(self.old_kernel_dir, KernelLlvmSourceBuilder) self.new_kernel = KernelSourceTree(self.new_kernel_dir, KernelLlvmSourceBuilder) self.old_snapshot = Snapshot(self.old_kernel, self.old_kernel) self.new_snapshot = Snapshot(self.new_kernel, self.new_kernel) self.config = Config(self.old_snapshot, self.new_snapshot, False, False, self.pattern_config, self.control_flow_only, False, False, False, None) self.functions = dict()
def test_load_snapshot_from_dir_functions(): """ Create a temporary snapshot directory and try to parse it. Use a YAML configuration file that contains only a list of functions. Expect that the function list inside the parsed snapshot contains a single "None" group which contains the list of loaded functions. All parsed LLVM paths should contain the list root dir. """ with TemporaryDirectory(prefix="test_snapshots_") as snap_dir, \ NamedTemporaryFile(mode="w+t", prefix="snapshot_", suffix=".yaml", dir=snap_dir) as config_file: # Populate the temporary snapshot configuration file. config_file.writelines(""" - created_time: 2020-01-01 00:00:00.000001+00:00 diffkemp_version: '0.1' kind: function_list list: - glob_var: null llvm: net/core/skbuff.ll name: ___pskb_trim tag: null - glob_var: null llvm: mm/page_alloc.ll name: __alloc_pages_nodemask tag: null llvm_source_finder: kind: kernel_with_builder path: null source_dir: /diffkemp/kernel/linux-3.10.0-957.el7 llvm_version: 13 """) # Load the temporary snapshot configuration file. config_file.seek(0) config_filename = os.path.basename(config_file.name) snap = Snapshot.load_from_dir(snap_dir, config_filename) assert str(snap.created_time) == "2020-01-01 00:00:00.000001+00:00" assert isinstance(snap.snapshot_tree, SourceTree) assert isinstance(snap.snapshot_tree.source_finder, KernelLlvmSourceBuilder) assert snap.snapshot_tree.source_dir == snap_dir assert len(snap.fun_groups) == 1 assert None in snap.fun_groups assert len(snap.fun_groups[None].functions) == 2 assert set(snap.fun_groups[None].functions.keys()) == \ {"___pskb_trim", "__alloc_pages_nodemask"} for name, f in snap.fun_groups[None].functions.items(): assert f.glob_var is None assert f.tag is None if name == "___pskb_trim": assert os.path.abspath(f.mod.llvm) == snap_dir + \ "/net/core/skbuff.ll" elif name == "__alloc_pages_nodemask": assert os.path.abspath(f.mod.llvm) == snap_dir + \ "/mm/page_alloc.ll"
def test_create_snapshot_from_source(): """Create a new kernel directory snapshot.""" kernel_dir = "kernel/linux-3.10.0-957.el7" output_dir = "snapshots/linux-3.10.0-957.el7" snap = Snapshot.create_from_source(kernel_dir, output_dir, None, False) assert snap.kernel_source is not None assert kernel_dir in snap.kernel_source.kernel_dir assert snap.snapshot_source is not None assert output_dir in snap.snapshot_source.kernel_dir assert snap.fun_kind is None assert len(snap.fun_groups) == 1 assert len(snap.fun_groups[None].functions) == 0 snap = Snapshot.create_from_source(kernel_dir, output_dir, "sysctl", False) assert snap.fun_kind == "sysctl" assert len(snap.fun_groups) == 0
def test_load_snapshot_from_dir_sysctls(): """ Create a temporary snapshot directory and try to parse it. Use a YAML configuration file that contains a list of sysctl groups, each group containing a single function. All parsed LLVM paths should contain the list root dir. """ with TemporaryDirectory(prefix="test_snapshots_sysctl_") as snap_dir, \ NamedTemporaryFile(mode="w+t", prefix="snapshot_", suffix=".yaml", dir=snap_dir) as config_file: # Populate the temporary sysctl snapshot configuration file. config_file.writelines(""" - created_time: 2020-01-01 00:00:00.000001 diffkemp_version: '0.1' kind: function_list list: - functions: - glob_var: null llvm: kernel/sched/fair.ll name: sched_proc_update_handler tag: proc handler sysctl: kernel.sched_latency_ns - functions: - glob_var: null llvm: kernel/sysctl.ll name: proc_dointvec_minmax tag: proc handler sysctl: kernel.timer_migration source_kernel_dir: /diffkemp/kernel/linux-3.10.0-957.el7 """) # Load the temporary sysctl snapshot configuration file. config_file.seek(0) config_filename = os.path.basename(config_file.name) snap = Snapshot.load_from_dir(snap_dir, config_filename) assert str(snap.created_time) == "2020-01-01 00:00:00.000001" assert len(snap.fun_groups) == 2 assert set(snap.fun_groups.keys()) == { "kernel.sched_latency_ns", "kernel.timer_migration" } for name, g in snap.fun_groups.items(): f = None assert len(g.functions) == 1 if name == "kernel.sched_latency_ns": assert g.functions.keys() == {"sched_proc_update_handler"} f = g.functions["sched_proc_update_handler"] assert os.path.abspath(f.mod.llvm) == snap_dir + \ "/kernel/sched/fair.ll" elif name == "kernel.timer_migration": assert g.functions.keys() == {"proc_dointvec_minmax"} f = g.functions["proc_dointvec_minmax"] assert os.path.abspath(f.mod.llvm) == snap_dir + \ "/kernel/sysctl.ll" assert f.tag == "proc handler" assert f.glob_var is None
def __init__(self, spec, task_name, tasks_path, kernel_path): self.old_kernel_dir = os.path.join(kernel_path, spec["old_kernel"]) self.new_kernel_dir = os.path.join(kernel_path, spec["new_kernel"]) self.name = task_name self.task_dir = os.path.join(tasks_path, task_name) if "control_flow_only" in spec: self.control_flow_only = spec["control_flow_only"] else: self.control_flow_only = False # Create LLVM sources and configuration self.old_kernel = KernelSource(self.old_kernel_dir, True) self.new_kernel = KernelSource(self.new_kernel_dir, True) self.old_snapshot = Snapshot(self.old_kernel, self.old_kernel) self.new_snapshot = Snapshot(self.new_kernel, self.new_kernel) self.config = Config(self.old_snapshot, self.new_snapshot, False, self.control_flow_only, False, False, None) self.functions = dict()
def test_to_yaml_sysctls(): """ Dump a snapshot with multiple sysctl groups into YAML. YAML string should contain the version of Diffkemp, source kernel directory, a simple list of function groups, each one containing a function list with the "name", "llvm", "glob_var" and "tag" fields set, and the kind of this list, which should be a group list. The LLVM paths in the YAML should be relative to the snapshot directory. """ kernel_dir = "kernel/linux-3.10.0-957.el7" output_dir = "snapshots-sysctl/linux-3.10.0-957.el7" snap = Snapshot.create_from_source(kernel_dir, output_dir, KernelLlvmSourceBuilder, None, "sysctl", False) snap.add_fun_group("kernel.sched_latency_ns") snap.add_fun_group("kernel.timer_migration") snap.add_fun( "sched_proc_update_handler", LlvmModule("snapshots-sysctl/linux-3.10.0-957.el7/" "kernel/sched/fair.ll"), None, "proc handler", "kernel.sched_latency_ns") snap.add_fun( "proc_dointvec_minmax", LlvmModule("snapshots-sysctl/linux-3.10.0-957.el7/kernel/sysctl.ll"), None, "proc handler", "kernel.timer_migration") yaml_str = snap.to_yaml() yaml_snap = yaml.safe_load(yaml_str) assert len(yaml_snap) == 1 yaml_dict = yaml_snap[0] assert len(yaml_dict) == 7 assert yaml_dict["llvm_source_finder"]["kind"] == "kernel_with_builder" assert isinstance(yaml_dict["created_time"], datetime.datetime) assert len(yaml_dict["list"]) == 2 assert set([g["sysctl"] for g in yaml_dict["list"] ]) == {"kernel.sched_latency_ns", "kernel.timer_migration"} for g in yaml_dict["list"]: assert len(g["functions"]) == 1 if g["sysctl"] == "kernel.sched_latency_ns": assert g["functions"][0] == { "name": "sched_proc_update_handler", "llvm": "kernel/sched/fair.ll", "glob_var": None, "tag": "proc handler" } elif g["sysctl"] == "kernel.timer_migration": assert g["functions"][0] == { "name": "proc_dointvec_minmax", "llvm": "kernel/sysctl.ll", "glob_var": None, "tag": "proc handler" }
def test_add_sysctl_fun_group(): """Create a snapshot and check the creation of a sysctl function group.""" kernel_dir = "kernel/linux-3.10.0-957.el7" output_dir = "snapshots/linux-3.10.0-957.el7" snap = Snapshot.create_from_source(kernel_dir, output_dir, "sysctl", False) snap.add_fun_group("kernel.sched_latency_ns") assert len(snap.fun_groups) == 1 assert "kernel.sched_latency_ns" in snap.fun_groups
def test_filter(): """Filter snapshot functions.""" kernel_dir = "kernel/linux-3.10.0-957.el7" output_dir = "snapshots/linux-3.10.0-957.el7" snap = Snapshot.create_from_source(kernel_dir, output_dir, None, False) snap.add_fun("___pskb_trim", LlvmKernelModule("net/core/skbuff.ll")) snap.add_fun("__alloc_pages_nodemask", LlvmKernelModule("mm/page_alloc.ll")) snap.filter(["__alloc_pages_nodemask"]) assert len(snap.fun_groups[None].functions) == 1 assert "___pskb_trim" not in snap.fun_groups[None].functions assert "__alloc_pages_nodemask" in snap.fun_groups[None].functions
def test_add_fun_none_group(): """Create a snapshot and try to add functions into a None group.""" kernel_dir = "kernel/linux-3.10.0-957.el7" output_dir = "snapshots/linux-3.10.0-957.el7" snap = Snapshot.create_from_source(kernel_dir, output_dir, None, False) mod = LlvmKernelModule("net/core/skbuff.ll") snap.add_fun("___pskb_trim", mod) assert "___pskb_trim" in snap.fun_groups[None].functions fun_desc = snap.fun_groups[None].functions["___pskb_trim"] assert fun_desc.mod is mod assert fun_desc.glob_var is None assert fun_desc.tag is None
def test_get_by_name_functions(): """Get the module of inserted function by its name.""" kernel_dir = "kernel/linux-3.10.0-957.el7" output_dir = "snapshots/linux-3.10.0-957.el7" snap = Snapshot.create_from_source(kernel_dir, output_dir, None, False) mod_buff = LlvmKernelModule("net/core/skbuff.ll") mod_alloc = LlvmKernelModule("mm/page_alloc.ll") snap.add_fun("___pskb_trim", mod_buff) snap.add_fun("__alloc_pages_nodemask", mod_alloc) fun = snap.get_by_name("___pskb_trim") assert fun.mod is mod_buff fun = snap.get_by_name("__alloc_pages_nodemask") assert fun.mod is mod_alloc
def test_add_fun_sysctl_group(): """Create a snapshot and try to add functions into sysctl groups.""" kernel_dir = "kernel/linux-3.10.0-957.el7" output_dir = "snapshots/linux-3.10.0-957.el7" snap = Snapshot.create_from_source(kernel_dir, output_dir, "sysctl", False) snap.add_fun_group("kernel.sched_latency_ns") mod = LlvmKernelModule("kernel/sched/debug.ll") snap.add_fun("sched_debug_header", mod, "sysctl_sched_latency", "using_data_variable \"sysctl_sched_latency\"", "kernel.sched_latency_ns") assert "sched_debug_header" in snap.fun_groups[ "kernel.sched_latency_ns"].functions fun_desc = snap.fun_groups["kernel.sched_latency_ns"].functions[ "sched_debug_header"] assert fun_desc.mod is mod assert fun_desc.glob_var == "sysctl_sched_latency" assert fun_desc.tag == "using_data_variable \"sysctl_sched_latency\""
def test_to_yaml_functions(): """ Dump a snapshot with a single "None" group into YAML. YAML string should contain the version of Diffkemp, source kernel directory, a simple list of functions, each one having the "name", "llvm", "glob_var" and "tag" fields set, and the kind of this list, which should be a function list. The LLVM paths in the YAML should be relative to the snapshot directory. """ kernel_dir = "kernel/linux-3.10.0-957.el7" output_dir = "snapshots/linux-3.10.0-957.el7" snap = Snapshot.create_from_source(kernel_dir, output_dir, KernelLlvmSourceBuilder, None, None, False) snap.add_fun( "___pskb_trim", LlvmModule("snapshots/linux-3.10.0-957.el7/net/core/skbuff.ll")) snap.add_fun("__alloc_pages_nodemask", LlvmModule("snapshots/linux-3.10.0-957.el7/mm/page_alloc.ll")) yaml_str = snap.to_yaml() yaml_snap = yaml.safe_load(yaml_str) assert len(yaml_snap) == 1 yaml_dict = yaml_snap[0] assert len(yaml_dict) == 7 assert isinstance(yaml_dict["created_time"], datetime.datetime) assert yaml_dict["llvm_source_finder"]["kind"] == "kernel_with_builder" assert len(yaml_dict["list"]) == 2 assert set([f["name"] for f in yaml_dict["list"]]) ==\ {"___pskb_trim", "__alloc_pages_nodemask"} for f in yaml_dict["list"]: if f["name"] == "___pskb_trim": assert f["llvm"] == "net/core/skbuff.ll" elif f["name"] == "__alloc_pages_nodemask": assert f["llvm"] == "mm/page_alloc.ll"
def test_get_by_name_sysctls(): """Get the module of inserted function by its name and sysctl group.""" kernel_dir = "kernel/linux-3.10.0-957.el7" output_dir = "snapshots/linux-3.10.0-957.el7" snap = Snapshot.create_from_source(kernel_dir, output_dir, "sysctl", False) snap.add_fun_group("kernel.sched_latency_ns") snap.add_fun_group("kernel.timer_migration") mod_fair = LlvmKernelModule( "snapshots-sysctl/linux-3.10.0-957.el7/kernel/sched/fair.ll") mod_sysctl = LlvmKernelModule( "snapshots-sysctl/linux-3.10.0-957.el7/kernel/sysctl.ll") snap.add_fun("sched_proc_update_handler", mod_fair, None, "proc handler", "kernel.sched_latency_ns") snap.add_fun("proc_dointvec_minmax", mod_sysctl, None, "proc handler", "kernel.timer_migration") # Test that the function fun = snap.get_by_name("proc_dointvec_minmax", "kernel.sched_latency_ns") assert fun is None fun = snap.get_by_name("proc_dointvec_minmax", "kernel.timer_migration") assert fun.mod is mod_sysctl
def test_get_modules(): """ Test getting all modules in the snapshot function lists. Check if the snapshot returns a list of all modules of all groups in case that multiple groups are present. """ kernel_dir = "kernel/linux-3.10.0-957.el7" output_dir = "snapshots/linux-3.10.0-957.el7" snap = Snapshot.create_from_source(kernel_dir, output_dir, "sysctl", False) snap.add_fun_group("kernel.sched_latency_ns") snap.add_fun_group("kernel.timer_migration") snap.add_fun("sched_proc_update_handler", LlvmKernelModule("kernel/sched/fair.ll"), None, "proc_handler", "kernel.sched_latency_ns") snap.add_fun("proc_dointvec_minmax", LlvmKernelModule("kernel/sysctl.ll"), None, "proc_handler", "kernel.timer_migration") modules = snap.modules() assert len(modules) == 2 assert set([m.llvm for m in modules ]) == {"kernel/sched/fair.ll", "kernel/sysctl.ll"}
def compare(args): """ Compare snapshots of linux kernels. Runs the semantic comparison and shows information about the compared functions that are semantically different. """ # Parse both the new and the old snapshot. old_snapshot = Snapshot.load_from_dir(args.snapshot_dir_old) new_snapshot = Snapshot.load_from_dir(args.snapshot_dir_new) # Set the output directory if not args.stdout: if args.output_dir: output_dir = args.output_dir if os.path.isdir(output_dir): sys.stderr.write("Error: output directory exists\n") sys.exit(errno.EEXIST) else: output_dir = default_output_dir(args.snapshot_dir_old, args.snapshot_dir_new) else: output_dir = None if args.function: old_snapshot.filter([args.function]) new_snapshot.filter([args.function]) config = Config(old_snapshot, new_snapshot, args.show_diff, args.output_llvm_ir, args.control_flow_only, args.print_asm_diffs, args.verbose, args.enable_simpll_ffi, args.semdiff_tool) result = Result(Result.Kind.NONE, args.snapshot_dir_old, args.snapshot_dir_old, start_time=default_timer()) for group_name, group in sorted(old_snapshot.fun_groups.items()): group_printed = False # Set the group directory if output_dir is not None and group_name is not None: group_dir = os.path.join(output_dir, group_name) else: group_dir = None result_graph = None cache = SimpLLCache(mkdtemp()) if args.enable_module_cache: module_cache = _generate_module_cache(group.functions.items(), group_name, new_snapshot, 3) else: module_cache = None for fun, old_fun_desc in sorted(group.functions.items()): # Check if the function exists in the other snapshot new_fun_desc = new_snapshot.get_by_name(fun, group_name) if not new_fun_desc: continue # Check if the module exists in both snapshots if old_fun_desc.mod is None or new_fun_desc.mod is None: result.add_inner(Result(Result.Kind.UNKNOWN, fun, fun)) if group_name is not None and not group_printed: print("{}:".format(group_name)) group_printed = True print("{}: unknown".format(fun)) continue # If function has a global variable, set it glob_var = KernelParam(old_fun_desc.glob_var) \ if old_fun_desc.glob_var else None # Run the semantic diff fun_result = functions_diff(mod_first=old_fun_desc.mod, mod_second=new_fun_desc.mod, fun_first=fun, fun_second=fun, glob_var=glob_var, config=config, prev_result_graph=result_graph, function_cache=cache, module_cache=module_cache) result_graph = fun_result.graph if fun_result is not None: if args.regex_filter is not None: # Filter results by regex pattern = re.compile(args.regex_filter) for called_res in fun_result.inner.values(): if pattern.search(called_res.diff): break else: fun_result.kind = Result.Kind.EQUAL_SYNTAX result.add_inner(fun_result) # Printing information about failures and non-equal functions. if fun_result.kind in [ Result.Kind.NOT_EQUAL, Result.Kind.ERROR, Result.Kind.UNKNOWN ]: if fun_result.kind == Result.Kind.NOT_EQUAL: # Create the output directory if needed if output_dir is not None: if not os.path.isdir(output_dir): os.mkdir(output_dir) # Create the group directory or print the group name # if needed if group_dir is not None: if not os.path.isdir(group_dir): os.mkdir(group_dir) elif group_name is not None and not group_printed: print("{}:".format(group_name)) group_printed = True print_syntax_diff( snapshot_dir_old=args.snapshot_dir_old, snapshot_dir_new=args.snapshot_dir_new, fun=fun, fun_result=fun_result, fun_tag=old_fun_desc.tag, output_dir=group_dir if group_dir else output_dir, show_diff=args.show_diff, initial_indent=2 if (group_name is not None and group_dir is None) else 0) else: # Print the group name if needed if group_name is not None and not group_printed: print("{}:".format(group_name)) group_printed = True print("{}: {}".format(fun, str(fun_result.kind))) # Clean LLVM modules (allow GC to collect the occupied memory) old_fun_desc.mod.clean_module() new_fun_desc.mod.clean_module() LlvmKernelModule.clean_all() old_snapshot.finalize() new_snapshot.finalize() if output_dir is not None and os.path.isdir(output_dir): print("Differences stored in {}/".format(output_dir)) if args.report_stat: print("") print("Statistics") print("----------") result.stop_time = default_timer() result.report_stat(args.show_errors) return 0
def generate(args): """ Generate snapshot of sources of kernel functions. This involves: - find source code with functions definitions - compile the source codes into LLVM IR - copy LLVM and C source files into snapshot directory - create YAML with list mapping functions to their LLVM sources """ # Create a new snapshot from the source directory. snapshot = Snapshot.create_from_source(args.kernel_dir, args.output_dir, "sysctl" if args.sysctl else None) source = snapshot.kernel_source # Build sources for symbols from the list into LLVM IR with open(args.functions_list, "r") as fun_list_file: for line in fun_list_file.readlines(): symbol = line.strip() if not symbol or not (symbol[0].isalpha() or symbol[0] == "_"): continue if args.sysctl: # For a sysctl parameter, we have to: # - get LLVM of a file which defines the sysctl option # - find and compile proc handler function and add it to the # snapshot # - find sysctl data variable # - find, complile, and add to snapshot all functions that # use the data variable # Get module with sysctl definitions try: sysctl_mod = source.get_sysctl_module(symbol) except SourceNotFoundException: print("{}: sysctl not supported".format(symbol)) # Iterate all sysctls represented by the symbol (it can be # a pattern). sysctl_list = sysctl_mod.parse_sysctls(symbol) if not sysctl_list: print("{}: no sysctl found".format(symbol)) for sysctl in sysctl_list: print("{}:".format(sysctl)) # New group in function list for the sysctl snapshot.add_fun_group(sysctl) # Proc handler function for sysctl proc_fun = sysctl_mod.get_proc_fun(sysctl) if proc_fun: try: proc_fun_mod = source.get_module_for_symbol( proc_fun) snapshot.add_fun(name=proc_fun, llvm_mod=proc_fun_mod, glob_var=None, tag="proc handler function", group=sysctl) print(" {}: {} (proc handler)".format( proc_fun, os.path.relpath(proc_fun_mod.llvm, args.kernel_dir))) except SourceNotFoundException: print(" could not build proc handler") # Functions using the sysctl data variable data = sysctl_mod.get_data(sysctl) if not data: continue for data_src in source.find_srcs_using_symbol(data.name): data_mod = source.get_module_from_source(data_src) if not data_mod: continue for data_fun in \ data_mod.get_functions_using_param(data): if data_fun == proc_fun: continue snapshot.add_fun(name=data_fun, llvm_mod=data_mod, glob_var=data.name, tag="function using sysctl data " "variable \"{}\"".format( data.name), group=sysctl) print( " {}: {} (using data variable \"{}\")".format( data_fun, os.path.relpath(data_mod.llvm, args.kernel_dir), data.name)) else: try: # For a normal function, we compile its source and include # it into the snapshot sys.stdout.write("{}: ".format(symbol)) llvm_mod = source.get_module_for_symbol(symbol) if not llvm_mod.has_function(symbol): print("unsupported") continue print(os.path.relpath(llvm_mod.llvm, args.kernel_dir)) snapshot.add_fun(symbol, llvm_mod) except SourceNotFoundException: print("source not found") snapshot.add_fun(symbol, None) snapshot.generate_snapshot_dir() snapshot.finalize()