Python isAnchor Examples

Programming Language: Python

Namespace/Package Name: config.anchor

Method/Function: isAnchor

Examples at hotexamples.com: 2

Python isAnchor - 2 examples found. These are the top rated real world Python examples of config.anchor.isAnchor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: matching_engine.py Project: trichimtrich/Karta

    def loadAndMatchAnchors(self, anchors_config, manual_anchors_config):
        """Load the list of anchor functions, and try to match them with the binary.

        Args:
            anchors_config (list): list of anchor src indices
            manual_anchors_config (list): list of user defined matches (Manual Anchors): (src index, bin_ea)
        """
        # Parse the anchors file
        self.logger.info("Loading the list of Anchor functions")
        self._src_anchor_list = anchors_config

        # Locate the anchor functions
        self.logger.info("Searching for the Anchor functions in the binary")
        self.logger.addIndent()
        all_bin_functions = self.disas.functions()
        # range narrowing variables
        lower_match_ea = None
        upper_match_ea = None
        lower_match_index = None
        upper_match_index = None
        lower_border_ea = 0
        upper_border_ea = 2**64 - 1
        lower_border_index = None
        upper_border_index = None
        function_range = None
        overall_num_functions = len(self._src_functions_list)
        multiple_option_candidates = []
        anchor_eas = []
        first_const_anchor = True
        efficient_const_search = False
        # pre-scan (for optimization reasons)
        anchor_stats = []
        num_const_clues = 0
        all_const_clues = set()
        all_string_clues = set()
        seen_strings, seen_consts, function_list = getContextsStats()
        for src_anchor_index in list(self._src_anchor_list):
            src_func_ctx = self.src_functions_ctx[src_anchor_index]
            is_str, threshold, anchor_clues = anchor.isAnchor(
                src_func_ctx, seen_strings, seen_consts, function_list,
                self.logger)
            # sanity check
            if anchor_clues is None:
                self._src_anchor_list.remove(src_anchor_index)
                self.logger.warning(
                    "Anchor candidate %s (%d) failed as an anchor function",
                    src_func_ctx.name, src_anchor_index)
                continue
            anchor_stats.append((src_anchor_index, src_func_ctx, is_str,
                                 threshold, anchor_clues))
            if is_str:
                all_string_clues = all_string_clues.union(anchor_clues)
            else:
                num_const_clues += len(anchor_clues)
                all_const_clues = all_const_clues.union(anchor_clues)

        # Traverse all of the strings only once, it is heavy
        anchor_bin_strs = defaultdict(list)
        # Scanning the entire string list and checking against each anchor string - O(kN) - efficient in memory
        if len(all_string_clues) > 0:
            for bin_str_ctx in self.disas.strings():
                bin_str = str(bin_str_ctx)
                if bin_str in all_string_clues:
                    anchor_bin_strs[bin_str].append(bin_str_ctx)

        # full scan (maybe only string scan)
        for src_anchor_index, src_func_ctx, is_str, threshold, anchor_clues in anchor_stats:
            candidates = None
            candidate_sets = []
            # scan the full clue list
            for clue_idx, clue in enumerate(anchor_clues):
                # strings
                if is_str:
                    current_set = set()
                    # found the string clue in the binary
                    if clue in anchor_bin_strs:
                        for bin_str in anchor_bin_strs[clue]:
                            for ref in self.disas.drefsTo(bin_str.ea):
                                caller_func = self.disas.funcAt(ref)
                                if caller_func is None:
                                    continue
                                callar_func_start = self.disas.funcStart(
                                    caller_func)
                                if lower_border_ea <= callar_func_start and callar_func_start <= upper_border_ea:
                                    current_set.add(callar_func_start)
                # consts
                else:
                    # measure some times (for the first one only)
                    if first_const_anchor:
                        start_time = time.time()
                    # scanning the entire firmware per anchor const - O(kN)
                    current_set = set()
                    # search for it in the binary (non efficient)
                    if lower_match_index is None or not efficient_const_search:
                        search_start = lower_border_ea if not first_const_anchor else 0
                        search_end = upper_border_ea if not first_const_anchor else (
                            2**64 - 1)
                        # start our search
                        for match_ea in self.disas.findImmediate(
                                search_start, search_end, clue):
                            # Filter out matches that are not inside functions
                            caller_func = self.disas.funcAt(match_ea)
                            if caller_func is not None:
                                current_set.add(
                                    self.disas.funcStart(caller_func))
                        # measure the end time too
                        if first_const_anchor:
                            end_time = time.time()
                            overall_search_time = (
                                end_time - start_time) * num_const_clues
                            if lower_match_index is None:
                                efficient_const_search = anchor.MAXIMAL_CONST_SEARCH_TIME <= overall_search_time
                            else:
                                efficient_const_search = anchor.MAXIMAL_CONST_SEARCH_RATE <= overall_search_time * 1.0 / (
                                    upper_match_index - lower_match_index + 1)
                            # no longer the first const
                            first_const_anchor = False
                    # efficient search
                    else:
                        if function_range is None:
                            self.logger.info(
                                "Anchor search - switching to efficient const search mode"
                            )
                            # build the fast mapping, and then continue as before
                            function_range = []
                            for function_ea in all_bin_functions[
                                    lower_border_index:upper_border_index]:
                                function_range.append(
                                    (function_ea,
                                     self.disas.locateAnchorConsts(
                                         function_ea, all_const_clues)))
                        # Now actually search for the wanted const value in the result sets
                        for function_ea, const_set in function_range:
                            if clue in const_set:
                                current_set.add(function_ea)

                # Same merging logic, for strings and consts
                # simply add this option (only if relevant)
                if len(current_set) > 0:
                    candidate_sets.append(current_set)
                # check if reached the limit
                if len(candidate_sets) >= threshold:
                    # start checking for a match
                    candidate_attempt = defaultdict(int)
                    for candidate_set in candidate_sets:
                        for candidate in candidate_set:
                            candidate_attempt[candidate] += 1
                    candidates = list(
                        filter(lambda x: candidate_attempt[x] >= threshold,
                               candidate_attempt.keys()))
                    future_candidates = list(
                        filter(
                            lambda x: candidate_attempt[x] >= threshold -
                            (len(anchor_clues) - (clue_idx + 1)),
                            candidate_attempt.keys()))
                    # stop condition
                    if len(candidates) == 1 and len(future_candidates) == 0:
                        break

            # check if needs to decide between multiple options
            if candidates is not None and len(candidates) > 1:
                sorted_candidates = list(candidate_attempt.keys())
                sorted_candidates.sort(key=lambda x: candidate_attempt[x],
                                       reverse=True)
                # if we have an absolute winner, than pick it (safe to access both cells because len() > 1)
                if candidate_attempt[sorted_candidates[0]] > candidate_attempt[
                        sorted_candidates[1]]:
                    candidates = [sorted_candidates[0]]

            # check if we have any candidate left
            if candidates is None or len(candidates) == 0:
                self.logger.warning(
                    "Anchor function - %s: Failed to find a match",
                    self._src_functions_list[src_anchor_index])
                self._src_anchor_list.remove(src_anchor_index)
            elif len(candidates) == 1:
                caller_func = self.disas.funcAt(candidates.pop())
                caller_func_start = self.disas.funcStart(caller_func)
                self.logger.info("Anchor function - %s: Matched at 0x%x (%s)",
                                 self._src_functions_list[src_anchor_index],
                                 caller_func_start,
                                 self.disas.funcName(caller_func))
                self._matched_anchors_ea[src_anchor_index] = caller_func_start
                anchor_eas.append(caller_func_start)
                self.declareMatch(src_anchor_index, caller_func_start,
                                  REASON_ANCHOR)
                # use the match to improve our search range
                # first anchor
                if len(self._matched_anchors_ea.keys()) == 1:
                    lower_match_ea = caller_func_start
                    upper_match_ea = lower_match_ea
                    lower_match_index = all_bin_functions.index(
                        caller_func_start)
                    upper_match_index = lower_match_index
                    change = True
                else:
                    # try to improve the lower border
                    if caller_func_start < lower_match_ea:
                        lower_match_ea = caller_func_start
                        new_lower_index = all_bin_functions.index(
                            caller_func_start)
                        if function_range is not None:
                            function_range = function_range[new_lower_index -
                                                            lower_match_index:]
                        lower_match_index = new_lower_index
                        change = True
                    # try to improve the lower border
                    elif upper_match_ea < caller_func_start:
                        upper_match_ea = caller_func_start
                        new_upper_index = all_bin_functions.index(
                            caller_func_start)
                        if function_range is not None:
                            function_range = function_range[:new_upper_index -
                                                            upper_match_index]
                        upper_match_index = new_upper_index
                        change = True
                    else:
                        change = False
                # adjust the borders accordingly
                if change:
                    locked_gap = upper_match_index - lower_match_index + 1
                    lower_border_index = lower_match_index - (
                        overall_num_functions - locked_gap)
                    upper_border_index = upper_match_index + (
                        overall_num_functions - locked_gap)
                    lower_border_ea = all_bin_functions[max(
                        lower_match_index -
                        (overall_num_functions - locked_gap), 0)]
                    upper_border_ea = all_bin_functions[min(
                        upper_match_index +
                        (overall_num_functions - locked_gap),
                        len(all_bin_functions) - 1)]
            else:
                self.logger.warning(
                    "Anchor function - %s: Found several matches (%d), will check it again later",
                    self._src_functions_list[src_anchor_index],
                    len(candidates))
                multiple_option_candidates.append(
                    (src_anchor_index, candidates))
        self.logger.removeIndent()

        # good time to match the user declared functions
        for src_index, bin_ea in manual_anchors_config:
            # check for user errors
            func_ctx = self.disas.funcAt(bin_ea)
            if func_ctx is None or self.disas.funcStart(func_ctx) != bin_ea:
                self.logger.warning(
                    "User defined anchor function %s should be matched to a *start* of a function, not to 0x%x (%s)",
                    self._src_functions_list[src_index], bin_ea,
                    self.disas.funcNameEA(bin_ea))
                continue
            # check for duplicates
            if src_index in self._matched_anchors_ea:
                # contradiction
                if bin_ea != self._matched_anchors_ea[src_index]:
                    actual_ea = self._matched_anchors_ea[src_index]
                    self.logger.warning(
                        "User defined anchor function %s contradicts match at 0x%x (%s), ignoring user definition",
                        self._src_functions_list[src_index], actual_ea,
                        self.disas.funcNameEA(actual_ea))
                    continue
                # duplicate
                else:
                    continue
            # duplicate at this point could only be a contradiction
            if bin_ea in anchor_eas and src_index not in self._matched_anchors_ea:
                self.logger.warning(
                    "User defined anchor function %s contradicts match at 0x%x (%s), ignoring user definition",
                    self._src_functions_list[src_index], bin_ea,
                    self.disas.funcNameEA(bin_ea))
                continue
            # can now safely declare this match
            self.logger.info(
                "User defined anchor function - %s: Matched at 0x%x (%s)",
                self._src_functions_list[src_index], bin_ea,
                self.disas.funcNameEA(bin_ea))
            self._matched_anchors_ea[src_index] = bin_ea
            anchor_eas.append(bin_ea)
            self._src_anchor_list.append(src_index)
            self.declareMatch(src_index, bin_ea, REASON_MANUAL_ANCHOR)
            # use the match to improve our search range
            # first anchor
            if len(self._matched_anchors_ea.keys()) == 1:
                lower_match_ea = bin_ea
                upper_match_ea = lower_match_ea
                lower_match_index = all_bin_functions.index(bin_ea)
                upper_match_index = lower_match_index
                change = True
            else:
                # try to improve the lower border
                if bin_ea < lower_match_ea:
                    lower_match_ea = bin_ea
                    new_lower_index = all_bin_functions.index(bin_ea)
                    if function_range is not None:
                        function_range = function_range[new_lower_index -
                                                        lower_match_index:]
                    lower_match_index = new_lower_index
                    change = True
                # try to improve the lower border
                elif upper_match_ea < bin_ea:
                    upper_match_ea = bin_ea
                    new_upper_index = all_bin_functions.index(bin_ea)
                    if function_range is not None:
                        function_range = function_range[:new_upper_index -
                                                        upper_match_index]
                    upper_match_index = new_upper_index
                    change = True
                else:
                    change = False
            # adjust the borders accordingly
            if change:
                locked_gap = upper_match_index - lower_match_index + 1
                lower_border_index = lower_match_index - (
                    overall_num_functions - locked_gap)
                upper_border_index = upper_match_index + (
                    overall_num_functions - locked_gap)
                lower_border_ea = all_bin_functions[max(
                    lower_match_index - (overall_num_functions - locked_gap),
                    0)]
                upper_border_ea = all_bin_functions[min(
                    upper_match_index + (overall_num_functions - locked_gap),
                    len(all_bin_functions) - 1)]

        # double check the candidates which had multiple options (if narrowed the search space)
        if lower_match_ea is not None:
            for src_anchor_index, candidates in multiple_option_candidates:
                # check if the manual definitions already defined this one
                if src_anchor_index in self._matched_anchors_ea:
                    continue
                filterred_candidates = list(
                    filter(
                        lambda x: lower_match_ea <= x and x <= upper_match_ea,
                        candidates))
                # matched
                if len(filterred_candidates) == 1:
                    bin_ea = filterred_candidates.pop()
                    if bin_ea in anchor_eas:
                        self.logger.warning(
                            "User defined anchor function at 0x%x (%s), blocked revived anchor: %s, dropped the anchor",
                            bin_ea, self.disas.funcNameEA(bin_ea),
                            self._src_functions_list[src_anchor_index])
                        self._src_anchor_list.remove(src_anchor_index)
                        continue
                    caller_func = self.disas.funcAt(bin_ea)
                    caller_func_start = self.disas.funcStart(caller_func)
                    self.logger.info(
                        "Anchor function (revived) - %s: Matched at 0x%x (%s)",
                        self._src_functions_list[src_anchor_index],
                        caller_func_start, self.disas.funcName(caller_func))
                    self._matched_anchors_ea[
                        src_anchor_index] = caller_func_start
                    anchor_eas.append(caller_func_start)
                    self.declareMatch(src_anchor_index, caller_func_start,
                                      REASON_ANCHOR)
                # still not found
                else:
                    self._src_anchor_list.remove(src_anchor_index)

        # make sure we found at least one anchor function
        if len(self._src_anchor_list) == 0:
            self.logger.error("Failed to match even a single Anchor function")
            raise KartaException

        # Create a binary anchor list for future use
        self._bin_anchor_list = []
        for src_anchor_index in self._src_anchor_list:
            self._bin_anchor_list.append(
                all_bin_functions.index(
                    self.function_matches[src_anchor_index]))

        # Sort the file list according to the (bin) order of the anchors
        old_anchor_list = list(self._src_anchor_list)
        self._src_anchor_list.sort(
            key=lambda x: self._bin_anchor_list[old_anchor_list.index(x)])

        # Sanity Check: make sure that the files are not mixed up
        anchor_files = []
        started = True
        for src_anchor_index in self._src_anchor_list:
            if not started and self.src_functions_ctx[
                    src_anchor_index].file != anchor_files[-1]:
                if self.src_functions_ctx[
                        src_anchor_index].file in anchor_files:
                    self.logger.error(
                        "Sanity check failed: the matched anchor functions are tangled between files..."
                    )
                    raise KartaException
            if self.src_functions_ctx[
                    src_anchor_index].file not in anchor_files:
                anchor_files.append(
                    self.src_functions_ctx[src_anchor_index].file)
            started = False

        # remove empty files (wierd edge case)
        self._src_file_names = list(
            filter(lambda x: len(self._src_file_mappings[x]) != 0,
                   self._src_file_mappings.keys()))
        removed_names = list(
            filter(lambda x: len(self._src_file_mappings[x]) == 0,
                   self._src_file_mappings.keys()))
        for name in removed_names:
            self._src_file_mappings.pop(name)

        # Now sort the src file names list according to the sorted anchors
        self._src_file_names = anchor_files + list(
            set(self._src_file_names).difference(anchor_files))

Example #2

Show file

def analyzeLibrary(config_name, bin_dirs, compiled_ars, prompter):
    """Analyze the open source library, file-by-file and merge the results.

    Args:
        config_name (str): name of the final JSON config file
        bin_dirs (list): list of paths to the binary folders containing the compiled *.o files
        compiled_ars (list): list of paths to the compiled *.ar files
        prompter (prompter): prompter instance
    """
    prompter.info("Starting to analyze the library")
    prompter.addIndent()
    ignore_archive = len(compiled_ars) == 0
    finished_scan = False

    # workaround the enumerate in the next loop
    if ignore_archive:
        compiled_ars = range(len(bin_dirs))

    # ida has severe bugs, make sure to warn the user in advance
    if disas_cmd.name() == "IDA" and ' ' in SCRIPT_PATH:
        prompter.error(
            "IDA does not support spaces (' ') in the script's path. Please move %s's directory accordingly (I feel your pain)",
            (LIBRARY_NAME))
        prompter.removeIndent()
        return

    # We could have 2 iteration rounds here
    while not finished_scan:
        # Prepare & load the stats from each file
        for index, compiled_ar in enumerate(compiled_ars):
            # check if this is a windows archive
            is_windows = isWindows()
            bin_dir = bin_dirs[index]
            bin_suffix = "o" if not is_windows else "obj"
            if not ignore_archive:
                prompter.info(
                    "Analyzing each of the files in the archive - %s",
                    compiled_ar)
            else:
                prompter.info(
                    "Analyzing each of the *.%s files in the bin directory" %
                    (bin_suffix))
            prompter.addIndent()
            archive_files = list(
                locateFiles(
                    bin_dir,
                    filter(lambda x: x.endswith("." + bin_suffix),
                           getArchiveFiles(compiled_ar))
                    if not ignore_archive else None, bin_suffix))
            # check if we need a progress bar
            if len(
                    archive_files
            ) >= PROGRESS_BAR_THRESHOLD and prompter._min_level > logging.DEBUG:
                progress_bar = ProgressBar(
                    'Analyzed %d/%d files - %d%% Completed',
                    len(archive_files),
                    20,
                    True,
                    time_format="Elapsed %M:%S -")
                progress_bar.start()
            else:
                progress_bar = None
            # start the work itself
            for full_file_path, compiled_file in archive_files:
                # ida has severe bugs, make sure to warn the user in advance
                if disas_cmd.name() == "IDA" and ' ' in full_file_path:
                    prompter.error(
                        "IDA does not support spaces (' ') in the file's path (in script mode). Please move the binary directory accordingly (I feel your pain)"
                    )
                    prompter.removeIndent()
                    return
                prompter.debug("%s - %s", full_file_path, compiled_file)
                if progress_bar is None:
                    prompter.info("%s - %s", compiled_file, full_file_path)
                # analyze the file
                analyzeFile(full_file_path, is_windows)
                # load the JSON data from it
                try:
                    fd = open(full_file_path + STATE_FILE_SUFFIX, 'r')
                except IOError:
                    prompter.error(
                        "Failed to create the .JSON file for file: %s" %
                        (compiled_file))
                    prompter.error(
                        "Read the log file for more information: %s" %
                        (constructLogPath(full_file_path)))
                    prompter.removeIndent()
                    prompter.removeIndent()
                    prompter.error("Encountered an error, exiting")
                    exit(1)
                # all was OK, can continue
                parseFileStats(
                    full_file_path,
                    json.load(fd, object_pairs_hook=collections.OrderedDict))
                fd.close()
                if progress_bar is not None:
                    progress_bar.advance(1)
            # wrap it up
            if progress_bar is not None:
                progress_bar.finish()
            prompter.removeIndent()

        # Resolve several unknowns refs as code refs
        prompter.info("Resolving cross-references between different files")
        resolveUnknowns()

        # check if we have any files in the list
        if len(src_file_mappings) == 0 and not ignore_archive:
            prompter.error("No files found in the archive :(")
            prompter.removeIndent()
            new_path = prompter.input(
                "Do you want to analyze all of the *.%s files in the bin directory? <Y/N>: "
                % (bin_suffix)).lower()
            if new_path != 'y':
                prompter.error("Finished with errors!")
                exit(2)
            # run again, and ignore the archive this time
            ignore_archive = True
            prompter.addIndent()
        else:
            finished_scan = True

    # Remove empty files
    prompter.info("Filtering out empty files")
    for file_name in filter(lambda x: len(src_file_mappings[x]) == 0,
                            src_file_mappings):
        src_file_mappings.pop(file_name)

    # Create the list of anchors
    str_anchors = []
    const_anchors = []
    anchors_list = []
    anchors_files = set()
    prompter.info("Identifying possible Anchor functions")
    prompter.addIndent()
    seen_strings, seen_consts, function_list = getContextsStats()
    for src_func_index, src_func_ctx in enumerate(src_functions_ctx):
        is_str, threshold, candidates = anchor.isAnchor(
            src_func_ctx, seen_strings, seen_consts, function_list, prompter)
        if candidates is None:
            continue
        if is_str:
            str_anchors.append(src_func_index)
        else:
            const_anchors.append(src_func_index)
        anchors_files.add(src_func_ctx.file)
    prompter.removeIndent()

    # strings before const, because they are faster to search for
    anchors_list = str_anchors + const_anchors

    # check if we have any files left
    if len(src_file_mappings) == 0:
        prompter.error("All files were empty :(")
        prompter.removeIndent()
        prompter.error("Finished with errors!")
        exit(2)

    # Check for an error
    if len(anchors_list) == 0:
        prompter.warning("Failed to find Anchor functions in the library :(")
        prompter.warning("You should define manual anchors instead")

    # Create the anchors file
    prompter.info("Generating the full JSON file: %s", config_name)
    prompter.addIndent()
    full_json = {}

    # Serialize the anchor list
    prompter.info("Writing the anchor list")
    full_json[JSON_TAG_ANCHORS] = anchors_list

    # Serialize the functions of each files
    prompter.info("Writing the function list for each of the files")
    file_dict = collections.OrderedDict()
    # find a common file prefix, and remove it form the file path
    if len(src_file_mappings) > 1:
        base_value = src_file_mappings.keys()[0].split(os.path.sep)
        comparison_value = src_file_mappings.keys()[-1].split(os.path.sep)
        for index in xrange(min(len(comparison_value), len(base_value))):
            if base_value[index] != comparison_value[index]:
                break
        common_path_len = len(os.path.sep.join(base_value[:index])) + 1
    else:
        common_path_len = len(bin_dirs[0]) + 1

    for src_file_name in src_file_mappings:
        file_dict[src_file_name[common_path_len:]] = map(
            lambda c: c.serialize(), src_file_mappings[src_file_name])
    full_json[JSON_TAG_FILES] = file_dict

    # actually dump it
    fd = open(config_name, "w")
    json.dump(full_json, fd)
    fd.close()
    prompter.removeIndent()

    prompter.info("Anchor to file ratio is: %d/%d", len(anchors_files),
                  len(src_file_mappings))
    prompter.info("Anchor to function ratio is: %d/%d", len(anchors_list),
                  len(src_functions_list))
    prompter.removeIndent()