Beispiel #1
0
def modules_diff(mod_first, mod_second, glob_var, fun, config):
    """
    Analyse semantic difference of two LLVM IR modules w.r.t. some parameter
    :param mod_first: First LLVM module
    :param mod_second: Second LLVM module
    :param glob_var: Parameter (global variable) to compare (if specified, all
                  functions using this variable are compared).
    :param fun: Function to be compared.
    :param config: Configuration.
    """
    result = Result(Result.Kind.NONE, mod_first, mod_second)

    if fun:
        funs_first = {fun}
        funs_second = {fun}
    elif glob_var:
        funs_first = mod_first.get_functions_using_param(glob_var)
        funs_second = mod_second.get_functions_using_param(glob_var)
    else:
        funs_first = []
        funs_second = []

    for fun in funs_first:
        if fun not in funs_second:
            continue
        if (not mod_first.has_function(fun)
                or not mod_second.has_function(fun)):
            print("    Given function not found in module")
            result.kind = Result.Kind.ERROR
            return result

        fun_result = functions_diff(mod_first=mod_first,
                                    mod_second=mod_second,
                                    fun_first=fun,
                                    fun_second=fun,
                                    glob_var=glob_var,
                                    config=config)
        result.add_inner(fun_result)

    return result
Beispiel #2
0
def compare(args):
    """
    Compare snapshots of linux kernels. Runs the semantic comparison and shows
    information about the compared functions that are semantically different.
    """
    # Parse both the new and the old snapshot.
    old_snapshot = Snapshot.load_from_dir(args.snapshot_dir_old)
    new_snapshot = Snapshot.load_from_dir(args.snapshot_dir_new)

    # Set the output directory
    if not args.stdout:
        if args.output_dir:
            output_dir = args.output_dir
            if os.path.isdir(output_dir):
                sys.stderr.write("Error: output directory exists\n")
                sys.exit(errno.EEXIST)
        else:
            output_dir = default_output_dir(args.snapshot_dir_old,
                                            args.snapshot_dir_new)
    else:
        output_dir = None

    if args.function:
        old_snapshot.filter([args.function])
        new_snapshot.filter([args.function])

    config = Config(old_snapshot, new_snapshot, args.show_diff,
                    args.output_llvm_ir, args.control_flow_only,
                    args.print_asm_diffs, args.verbose, args.enable_simpll_ffi,
                    args.semdiff_tool)
    result = Result(Result.Kind.NONE,
                    args.snapshot_dir_old,
                    args.snapshot_dir_old,
                    start_time=default_timer())

    for group_name, group in sorted(old_snapshot.fun_groups.items()):
        group_printed = False

        # Set the group directory
        if output_dir is not None and group_name is not None:
            group_dir = os.path.join(output_dir, group_name)
        else:
            group_dir = None

        result_graph = None
        cache = SimpLLCache(mkdtemp())

        if args.enable_module_cache:
            module_cache = _generate_module_cache(group.functions.items(),
                                                  group_name, new_snapshot, 3)
        else:
            module_cache = None

        for fun, old_fun_desc in sorted(group.functions.items()):
            # Check if the function exists in the other snapshot
            new_fun_desc = new_snapshot.get_by_name(fun, group_name)
            if not new_fun_desc:
                continue

            # Check if the module exists in both snapshots
            if old_fun_desc.mod is None or new_fun_desc.mod is None:
                result.add_inner(Result(Result.Kind.UNKNOWN, fun, fun))
                if group_name is not None and not group_printed:
                    print("{}:".format(group_name))
                    group_printed = True
                print("{}: unknown".format(fun))
                continue

            # If function has a global variable, set it
            glob_var = KernelParam(old_fun_desc.glob_var) \
                if old_fun_desc.glob_var else None

            # Run the semantic diff
            fun_result = functions_diff(mod_first=old_fun_desc.mod,
                                        mod_second=new_fun_desc.mod,
                                        fun_first=fun,
                                        fun_second=fun,
                                        glob_var=glob_var,
                                        config=config,
                                        prev_result_graph=result_graph,
                                        function_cache=cache,
                                        module_cache=module_cache)
            result_graph = fun_result.graph

            if fun_result is not None:
                if args.regex_filter is not None:
                    # Filter results by regex
                    pattern = re.compile(args.regex_filter)
                    for called_res in fun_result.inner.values():
                        if pattern.search(called_res.diff):
                            break
                    else:
                        fun_result.kind = Result.Kind.EQUAL_SYNTAX

                result.add_inner(fun_result)

                # Printing information about failures and non-equal functions.
                if fun_result.kind in [
                        Result.Kind.NOT_EQUAL, Result.Kind.ERROR,
                        Result.Kind.UNKNOWN
                ]:
                    if fun_result.kind == Result.Kind.NOT_EQUAL:
                        # Create the output directory if needed
                        if output_dir is not None:
                            if not os.path.isdir(output_dir):
                                os.mkdir(output_dir)
                        # Create the group directory or print the group name
                        # if needed
                        if group_dir is not None:
                            if not os.path.isdir(group_dir):
                                os.mkdir(group_dir)
                        elif group_name is not None and not group_printed:
                            print("{}:".format(group_name))
                            group_printed = True
                        print_syntax_diff(
                            snapshot_dir_old=args.snapshot_dir_old,
                            snapshot_dir_new=args.snapshot_dir_new,
                            fun=fun,
                            fun_result=fun_result,
                            fun_tag=old_fun_desc.tag,
                            output_dir=group_dir if group_dir else output_dir,
                            show_diff=args.show_diff,
                            initial_indent=2 if (group_name is not None
                                                 and group_dir is None) else 0)
                    else:
                        # Print the group name if needed
                        if group_name is not None and not group_printed:
                            print("{}:".format(group_name))
                            group_printed = True
                        print("{}: {}".format(fun, str(fun_result.kind)))

            # Clean LLVM modules (allow GC to collect the occupied memory)
            old_fun_desc.mod.clean_module()
            new_fun_desc.mod.clean_module()
            LlvmKernelModule.clean_all()

    old_snapshot.finalize()
    new_snapshot.finalize()

    if output_dir is not None and os.path.isdir(output_dir):
        print("Differences stored in {}/".format(output_dir))

    if args.report_stat:
        print("")
        print("Statistics")
        print("----------")
        result.stop_time = default_timer()
        result.report_stat(args.show_errors)
    return 0
def _run_llreve_z3(first, second, funFirst, funSecond, coupled, timeout,
                   verbose):
    """
    Run the comparison of semantics of two functions using the llreve tool and
    the Z3 SMT solver. The llreve tool takes compared functions in LLVM IR and
    generates a formula in first-order predicate logic. The formula is then
    solved using the Z3 solver. If it is unsatisfiable, the compared functions
    are semantically the same, otherwise, they are different.

    The generated formula is in the theory of bitvectors.

    :param first: File with the first LLVM module
    :param second: File with the second LLVM module
    :param funFirst: Function from the first module to be compared
    :param funSecond: Function from the second module to be compared
    :param coupled: List of coupled functions (functions that are supposed to
                    correspond to each other in both modules). These are needed
                    for functions not having definintions.
    :param timeout: Timeout for the analysis in seconds
    :param verbose: Verbosity option
    """

    stderr = None
    if not verbose:
        stderr = open('/dev/null', 'w')

    # Commands for running llreve and Z3 (output of llreve is piped into Z3)
    command = [
        "build/llreve/reve/reve/llreve", first, second,
        "--fun=" + funFirst + "," + funSecond, "-muz", "--ir-input",
        "--bitvect", "--infer-marks", "--disable-auto-coupling"
    ]
    for c in coupled:
        command.append("--couple-functions={},{}".format(c[0], c[1]))

    if verbose:
        sys.stderr.write(" ".join(command) + "\n")

    llreve_process = Popen(command, stdout=PIPE, stderr=stderr)

    z3_process = Popen(["z3", "fixedpoint.engine=duality", "-in"],
                       stdin=llreve_process.stdout,
                       stdout=PIPE,
                       stderr=stderr)

    # Set timeout for both tools
    timer = Timer(timeout, _kill, [[llreve_process, z3_process]])
    try:
        timer.start()

        z3_process.wait()
        result_kind = Result.Kind.ERROR
        # Processing the output
        for line in z3_process.stdout:
            line = line.strip()
            if line == b"sat":
                result_kind = Result.Kind.NOT_EQUAL
            elif line == b"unsat":
                result_kind = Result.Kind.EQUAL
            elif line == b"unknown":
                result_kind = Result.Kind.UNKNOWN

        if z3_process.returncode != 0:
            result_kind = Result.Kind.ERROR
    finally:
        if not timer.is_alive():
            result_kind = Result.Kind.TIMEOUT
        timer.cancel()

    return Result(result_kind, first, second)
def functions_diff(mod_first,
                   mod_second,
                   fun_first,
                   fun_second,
                   glob_var,
                   config,
                   prev_result_graph=None,
                   function_cache=None):
    """
    Compare two functions for equality.

    First, functions are simplified and compared for syntactic equality using
    the SimpLL tool. If they are not syntactically equal, SimpLL prints a list
    of functions that the syntactic equality depends on. These are then
    compared for semantic equality.
    :param mod_first: First LLVM module
    :param mod_second: Second LLVM module
    :param fun_first: Function from the first module to be compared
    :param fun_second: Function from the second module to be compared
    :param glob_var: Global variable whose effect on the functions to compare
    :param config: Configuration
    :param prev_result_graph: Graph generated by the previous comparison (used
    to pass already known results to be used in this comparison).
    :param function_cache: Cache for SimpLL containing all functions
    present in the current graph (passed to this function to be updated with
    the results of the comparison).
    """
    result = Result(Result.Kind.NONE, fun_first, fun_second)
    curr_result_graph = None
    try:
        if config.verbosity:
            if fun_first == fun_second:
                fun_str = fun_first
            else:
                fun_str = "{} and {}".format(fun_first, fun_second)
            print("Syntactic diff of {} (in {})".format(
                fun_str, mod_first.llvm))

        simplify = True
        while simplify:
            simplify = False
            if (prev_result_graph and fun_first in prev_result_graph.vertices
                    and (prev_result_graph.vertices[fun_first].result !=
                         Result.Kind.ASSUMED_EQUAL)):
                first_simpl = ""
                second_simpl = ""
                curr_result_graph = prev_result_graph
            else:
                # Simplify modules and get the output graph.
                first_simpl, second_simpl, curr_result_graph, missing_defs = \
                    run_simpll(first=mod_first.llvm, second=mod_second.llvm,
                               fun_first=fun_first, fun_second=fun_second,
                               var=glob_var.name if glob_var else None,
                               suffix=glob_var.name if glob_var else "simpl",
                               cache_dir=function_cache.directory
                               if function_cache else None,
                               control_flow_only=config.control_flow_only,
                               output_llvm_ir=config.output_llvm_ir,
                               print_asm_diffs=config.print_asm_diffs,
                               verbose=config.verbosity,
                               use_ffi=config.use_ffi)
                if missing_defs:
                    # If there are missing function definitions, try to find
                    # their implementation, link them to the current modules,
                    # and rerun the simplification.
                    for fun_pair in missing_defs:
                        if "first" in fun_pair:
                            if _link_symbol_def(config.snapshot_first,
                                                mod_first, fun_pair["first"]):
                                simplify = True

                        if "second" in fun_pair:
                            if _link_symbol_def(config.snapshot_second,
                                                mod_second,
                                                fun_pair["second"]):
                                simplify = True
                if prev_result_graph and not simplify:
                    # Note: "curr_result_graph" is here the partial result
                    # graph, i.e. can contain unknown results that are known in
                    # the graph from the previous comparison.
                    prev_result_graph.absorb_graph(curr_result_graph)
                    curr_result_graph = prev_result_graph

                    # Add the newly received results to the ignored functions
                    # file.
                    # Note: there won't be any duplicates, since all functions
                    # that were in the cache before will be marked as unknown.
                    if function_cache:
                        function_cache.update([
                            v for v in curr_result_graph.vertices.values()
                            if v.result not in
                            [Result.Kind.UNKNOWN, Result.Kind.ASSUMED_EQUAL]
                        ])

        objects_to_compare, syndiff_bodies_left, syndiff_bodies_right = \
            curr_result_graph.graph_to_fun_pair_list(fun_first, fun_second)

        mod_first.restore_unlinked_llvm()
        mod_second.restore_unlinked_llvm()

        if not objects_to_compare:
            result.kind = Result.Kind.EQUAL_SYNTAX
        else:
            # If the functions are not syntactically equal, objects_to_compare
            # contains a list of functions and macros that are different.
            for fun_pair in objects_to_compare:
                if (not fun_pair[0].diff_kind == "function"
                        and config.semdiff_tool is not None):
                    # If a semantic diff tool is set, use it for further
                    # comparison of non-equal functions
                    fun_result = functions_semdiff(first_simpl, second_simpl,
                                                   fun_pair[0].name,
                                                   fun_pair[1].name, config)
                else:
                    fun_result = Result(fun_pair[2], fun_first, fun_second)
                fun_result.first = fun_pair[0]
                fun_result.second = fun_pair[1]
                if fun_result.kind == Result.Kind.NOT_EQUAL:
                    if fun_result.first.diff_kind in ["function", "type"]:
                        # Get the syntactic diff of functions or types
                        fun_result.diff = syntax_diff(
                            fun_result.first.filename,
                            fun_result.second.filename, fun_result.first.name,
                            fun_result.first.diff_kind, fun_pair[0].line,
                            fun_pair[1].line)
                    elif fun_result.first.diff_kind == "syntactic":
                        # Find the syntax differences and append the left and
                        # right value to create the resulting diff
                        fun_result.diff = "  {}\n\n  {}\n".format(
                            syndiff_bodies_left[fun_result.first.name],
                            syndiff_bodies_right[fun_result.second.name])
                    else:
                        sys.stderr.write(
                            "warning: unknown diff kind: {}\n".format(
                                fun_result.first.diff_kind))
                        fun_result.diff = "unknown\n"
                result.add_inner(fun_result)
        if config.verbosity:
            print("  {}".format(result))
    except ValueError:
        result.kind = Result.Kind.ERROR
    except SimpLLException as e:
        if config.verbosity:
            print(e)
        result.kind = Result.Kind.ERROR
    result.graph = (curr_result_graph
                    if curr_result_graph else prev_result_graph)
    return result
Beispiel #5
0
def diff_all_modules_using_global(glob_first, glob_second, config):
    """
    Compare semantics of all modules using given global variable.
    Finds all source files that use the given globals and compare all of them.
    :param glob_first: First global to compare
    :param glob_second: Second global to compare
    :param config: Configuration
    """
    result = Result(Result.Kind.NONE, glob_first.name, glob_second.name)
    if glob_first.name != glob_second.name:
        # Variables with different names are treated as unequal
        result.kind = Result.Kind.NOT_EQUAL
        return result

    srcs_first = config.source_first.find_srcs_using_symbol(glob_first.name)
    srcs_second = config.source_second.find_srcs_using_symbol(glob_second.name)
    # Compare all sources containing functions using the variable
    for src in srcs_first:
        if src not in srcs_second:
            result.add_inner(Result(Result.Kind.NOT_EQUAL, src, src))
        else:
            try:
                mod_first = config.source_first.get_module_from_source(src)
                mod_second = config.source_second.get_module_from_source(src)
                mod_first.parse_module()
                mod_second.parse_module()
                if (mod_first.has_global(glob_first.name) and
                        mod_second.has_global(glob_second.name)):
                    src_result = modules_diff(
                        mod_first=mod_first, mod_second=mod_second,
                        glob_var=glob_first, fun=None,
                        config=config)
                    for res in src_result.inner.values():
                        result.add_inner(res)
            except BuildException as e:
                if config.verbosity:
                    print(e)
                result.add_inner(Result(Result.Kind.ERROR, src, src))
    return result
Beispiel #6
0
def compare(args):
    old_functions = FunctionList(args.snapshot_dir_old)
    with open(os.path.join(args.snapshot_dir_old, "functions.yaml"),
              "r") as fun_list_yaml:
        old_functions.from_yaml(fun_list_yaml.read())
    old_source = KernelSource(args.snapshot_dir_old)
    new_functions = FunctionList(args.snapshot_dir_new)
    with open(os.path.join(args.snapshot_dir_new, "functions.yaml"),
              "r") as fun_list_yaml:
        new_functions.from_yaml(fun_list_yaml.read())
    new_source = KernelSource(args.snapshot_dir_new)

    if args.function:
        old_functions.filter([args.function])
        new_functions.filter([args.function])

    config = Config(old_source, new_source, args.show_diff,
                    args.control_flow_only, args.verbose,
                    args.semdiff_tool)
    result = Result(Result.Kind.NONE, args.snapshot_dir_old,
                    args.snapshot_dir_old)

    for fun, old_mod in sorted(old_functions.functions.items()):
        new_mod = new_functions.get_by_name(fun)
        if not (old_mod.has_function(fun) and new_mod.has_function(fun)):
            continue

        fun_result = functions_diff(
            mod_first=old_mod, mod_second=new_mod,
            fun_first=fun, fun_second=fun,
            glob_var=None, config=config)

        if fun_result is not None:
            if args.regex_filter is not None:
                # Filter results by regex
                pattern = re.compile(args.regex_filter)
                for called_res in fun_result.inner.values():
                    if pattern.search(called_res.diff):
                        break
                else:
                    fun_result.kind = Result.Kind.EQUAL_SYNTAX

            result.add_inner(fun_result)
            if fun_result.kind in [Result.Kind.ERROR, Result.Kind.UNKNOWN]:
                print("{}: {}".format(fun, str(fun_result.kind)))
            elif fun_result.kind == Result.Kind.NOT_EQUAL:
                print_syntax_diff(args.snapshot_dir_old,
                                  args.snapshot_dir_new,
                                  fun, fun_result, False,
                                  args.show_diff)

        # Clean LLVM modules (allow GC to collect the occupied memory)
        old_mod.clean_module()
        new_mod.clean_module()
        LlvmKernelModule.clean_all()

    if args.report_stat:
        print("")
        print("Statistics")
        print("----------")
        result.report_stat(args.show_errors)
    return 0
Beispiel #7
0
    def graph_to_fun_pair_list(self, fun_first, fun_second):
        # Extract the functions that should be compared from the graph in
        # the form of Vertex objects.
        called_funs_left, backtracking_map_left = self.reachable_from(
            ComparisonGraph.Side.LEFT, fun_first)
        called_funs_right, backtracking_map_right = self.reachable_from(
            ComparisonGraph.Side.RIGHT, fun_second)
        # Use the intersection of called functions in the left and right
        # module (i.e. differences in functions called in one module only
        # are not processed).
        vertices_to_compare = list(set(called_funs_left).intersection(
            set(called_funs_right)))
        # Use methods from ComparisonGraph (on the graph variable) and
        # vertices_to_compare to generate objects_to_compare.
        objects_to_compare = []
        syndiff_bodies_left = dict()
        syndiff_bodies_right = dict()
        for vertex in vertices_to_compare:
            if vertex.result in [Result.Kind.EQUAL,
                                 Result.Kind.ASSUMED_EQUAL]:
                # Do not include equal functions into the result.
                continue
            # Generate and add the function difference.
            fun_pair = []
            for side in ComparisonGraph.Side:
                fun = fun_first if side == ComparisonGraph.Side.LEFT \
                    else fun_second
                backtracking_map = (backtracking_map_left
                                    if side == ComparisonGraph.Side.LEFT
                                    else backtracking_map_right)
                if fun == vertex.names[side]:
                    # There is no callstack from the base function.
                    calls = None
                else:
                    # Transform the Edge objects returned by
                    # get_shortest_path to a readable callstack.
                    edges = _get_callstack(backtracking_map, self[fun], vertex)
                    calls = self._edge_callstack_to_string(edges)
                # Note: a function diff is covered (i.e. hidden when empty)
                # if and only if there is a non-function difference
                # referencing it.
                fun_pair.append(Result.Entity(
                    vertex.names[side],
                    vertex.files[side],
                    vertex.lines[side],
                    calls,
                    "function",
                    covered=len(vertex.nonfun_diffs) != 0
                ))
            fun_pair.append(vertex.result)
            objects_to_compare.append(tuple(fun_pair))

            # Process non-function differences.
            for nonfun_diff in vertex.nonfun_diffs:
                nonfun_pair = []
                for side in ComparisonGraph.Side:
                    syndiff_bodies = (syndiff_bodies_left
                                      if side == ComparisonGraph.Side.LEFT
                                      else syndiff_bodies_right)
                    backtracking_map = (backtracking_map_left
                                        if side == ComparisonGraph.Side.LEFT
                                        else backtracking_map_right)
                    # Convert the YAML callstack format to string.
                    calls = self._yaml_callstack_to_string(
                        nonfun_diff.callstack[side])
                    # Append the parent function's callstack.
                    # (unless it is the base function)
                    fun = fun_first if side == ComparisonGraph.Side.LEFT \
                        else fun_second
                    if nonfun_diff.parent_fun != fun:
                        parent_calls = self._edge_callstack_to_string(
                            _get_callstack(backtracking_map, self[fun],
                                           vertex))
                        calls = parent_calls + "\n" + calls

                    if isinstance(nonfun_diff, ComparisonGraph.SyntaxDiff):
                        nonfun_pair.append(Result.Entity(
                            nonfun_diff.name,
                            None,
                            None,
                            calls,
                            "syntactic",
                            False
                        ))
                        syndiff_bodies[nonfun_diff.name] = \
                            nonfun_diff.body[side]
                    elif isinstance(nonfun_diff, ComparisonGraph.TypeDiff):
                        nonfun_pair.append(Result.Entity(
                            nonfun_diff.name,
                            nonfun_diff.file[side],
                            nonfun_diff.line[side],
                            calls,
                            "type",
                            False
                        ))
                # Non-function differences are always of the non-equal type
                nonfun_pair.append(Result.Kind.NOT_EQUAL)
                objects_to_compare.append(tuple(nonfun_pair))
        return objects_to_compare, syndiff_bodies_left, syndiff_bodies_right
Beispiel #8
0
def simplify_modules_diff(first,
                          second,
                          fun_first,
                          fun_second,
                          var,
                          suffix=None,
                          control_flow_only=False,
                          verbose=False):
    """
    Simplify modules to ease their semantic difference. Uses the SimpLL tool.
    """
    stderr = None
    if not verbose:
        stderr = open(os.devnull, "w")

    first_out_name = add_suffix(first, suffix) if suffix else first
    second_out_name = add_suffix(second, suffix) if suffix else second

    try:
        simpll_command = [
            "build/diffkemp/simpll/simpll", first, second, "--print-callstacks"
        ]
        # Main (analysed) functions
        simpll_command.append("--fun")
        if fun_first != fun_second:
            simpll_command.append("{},{}".format(fun_first, fun_second))
        else:
            simpll_command.append(fun_first)
        # Analysed variable
        if var:
            simpll_command.extend(["--var", var])
        # Suffix for output files
        if suffix:
            simpll_command.extend(["--suffix", suffix])

        if control_flow_only:
            simpll_command.append("--control-flow")

        if verbose:
            simpll_command.append("--verbose")
            print(" ".join(simpll_command))

        simpll_out = check_output(simpll_command)
        check_call([
            "opt", "-S", "-deadargelim", "-o", first_out_name, first_out_name
        ],
                   stderr=stderr)
        check_call([
            "opt", "-S", "-deadargelim", "-o", second_out_name, second_out_name
        ],
                   stderr=stderr)

        first_out = LlvmKernelModule(first_out_name)
        second_out = LlvmKernelModule(second_out_name)

        objects_to_compare = []
        missing_defs = None
        syndiff_defs = None
        try:
            simpll_result = yaml.safe_load(simpll_out)
            if simpll_result is not None:
                if "diff-functions" in simpll_result:
                    for fun_pair_yaml in simpll_result["diff-functions"]:
                        fun_pair = [
                            Result.Entity(
                                fun["function"],
                                fun["file"] if "file" in fun else "",
                                fun["line"] if "line" in fun else None,
                                "\n".join([
                                    "{} at {}:{}".format(
                                        call["function"], call["file"],
                                        call["line"])
                                    for call in fun["callstack"]
                                ]) if "callstack" in fun else "",
                                fun["is-syn-diff"], fun["covered-by-syn-diff"])
                            for fun in
                            [fun_pair_yaml["first"], fun_pair_yaml["second"]]
                        ]

                        objects_to_compare.append(tuple(fun_pair))
                missing_defs = simpll_result["missing-defs"] \
                    if "missing-defs" in simpll_result else None
                syndiff_defs = simpll_result["syndiff-defs"] \
                    if "syndiff-defs" in simpll_result else None
        except yaml.YAMLError:
            pass

        return first_out, second_out, objects_to_compare, missing_defs, \
            syndiff_defs
    except CalledProcessError:
        raise SimpLLException("Simplifying files failed")