Esempio n. 1
0
    def delCodePtr(self, src, dest):
        """Delete a code pointer (probably was found to be a False Positive).

        Args:
            src (int) effective address for the pointer's location
            dest (int): effective address for the (assumed) pointed code address
        """
        idc.del_dref(src, dest)
        idc.del_cref(src, dest, 0)
        ida_bytes.del_items(src, 0, self.addressSize())
Esempio n. 2
0
    def locateDataPtrs(self, scs, sds):
        """Locate all data / code fptrs in the given set of segments.

        Args:
            scs (list): list of (sark) code segments
            sds (list): list of (sark) data segments
        """
        local_ref_ptrs = defaultdict(set)
        seen_list = []
        approved_ptrs = []
        approved_eas = set()
        ptrs_mappings = defaultdict(set)
        marked_artifacts = []
        for sd in sds:
            cur_ea = pad(sd.start_ea, self._analyzer.data_fptr_alignment)
            while cur_ea < sd.end_ea:
                line = sark.Line(cur_ea)
                if line.is_string:
                    cur_ea += pad(line.size,
                                  self._analyzer.data_fptr_alignment)
                    continue
                # check for a function ptr
                value = self._analyzer.parseAdderss(cur_ea)
                # make sure it is valid (enforces that the code_type is active)
                if self.isValidCodePtr(value, scs):
                    func_value = self._analyzer.cleanPtr(value)
                    code_type = self._analyzer.ptrCodeType(value)
                    # is seen
                    if func_value in local_ref_ptrs:
                        local_ref_ptrs[func_value].add(code_type)
                        ptrs_mappings[func_value].add(cur_ea)
                        self._analyzer.logger.debug(
                            "Located a fptr from 0x%x to 0x%x (type: %d) - Undeclared function",
                            cur_ea, func_value, code_type)
                        if self.isPrintableAddress(value):
                            self._analyzer.logger.debug(
                                "Looks like a printable FP: 0x%x", value)
                        approved_ptrs.append((cur_ea, value))
                        approved_eas.add(cur_ea)
                        seen_list.append((cur_ea, True))
                        marked_artifacts.append((cur_ea, True))
                    # is start of real function, from the correct type
                    elif self._analyzer.codeType(
                            func_value
                    ) == code_type and self._analyzer.func_classifier.isFuncStart(
                            func_value):
                        local_ref_ptrs[func_value].add(code_type)
                        ptrs_mappings[func_value].add(cur_ea)
                        self._analyzer.logger.debug(
                            "Located a fptr from 0x%x to 0x%x (type: %d) - Existing function",
                            cur_ea, func_value, code_type)
                        approved_ptrs.append((cur_ea, value))
                        approved_eas.add(cur_ea)
                        seen_list.append((cur_ea, True))
                        marked_artifacts.append((cur_ea, True))
                    # is start of function
                    elif self._analyzer.func_classifier.predictFunctionStartMixed(
                            func_value, known_type=code_type):
                        local_ref_ptrs[func_value].add(code_type)
                        ptrs_mappings[func_value].add(cur_ea)
                        self._analyzer.logger.debug(
                            "Located a fptr from 0x%x to 0x%x (type: %d) - Undeclared function",
                            cur_ea, func_value, code_type)
                        if self.isPrintableAddress(value):
                            self._analyzer.logger.debug(
                                "Looks like a printable FP: 0x%x", value)
                        approved_ptrs.append((cur_ea, value))
                        approved_eas.add(cur_ea)
                        seen_list.append((cur_ea, True))
                        marked_artifacts.append((cur_ea, True))
                    # only a candidate - may be will be approved later
                    else:
                        seen_list.append((cur_ea, False))
                        # check for an analysis problem
                        if len(list(line.drefs_from)) > 0:
                            idc.del_dref(cur_ea, value)
                            idc.del_dref(cur_ea, func_value)
                # Check for a valid data pointer
                elif self.isValidDataPtr(value, sds):
                    # make it a data pointer
                    self._analyzer.markDataPtr(cur_ea, value)
                    self._analyzer.logger.debug(
                        "Located a data ptr from 0x%x to 0x%x", cur_ea, value)
                    marked_artifacts.append((cur_ea, False))
                    marked_artifacts.append((value, False))
                # continue forward
                cur_ea += pad(self._analyzer.addressSize(),
                              self._analyzer.data_fptr_alignment)

        # check if there is some pattern we can use to find more fptrs
        chosen_threshold = 7
        cur_window = []
        window_index = 0
        # NOTE: this step is too risky if there are Read-Only data constants inside the text section
        while window_index < len(
                seen_list) and not self._analyzer.isCodeMixedWithData():
            # If we didn't reach the end, and
            # 1. The window doesn't have enough "True" pointers
            # 2. The windows contains only "True" pointers
            # Slide the window onward
            while window_index < len(seen_list) and (
                    len(list(filter(lambda x: x[1],
                                    cur_window))) < chosen_threshold
                    or len(list(filter(lambda x: not x[1], cur_window))) == 0):
                # If we are above the threshold (meaning that cond #2 applies), kick out the first ptr (which is a "True" ptr)
                if chosen_threshold < len(
                        list(filter(lambda x: x[1], cur_window))):
                    cur_window = cur_window[1:]
                # Add a new pointer at the end of our window
                cur_window.append(seen_list[window_index])
                window_index += 1
            # Sanity check: check if we have a candidate
            if window_index == len(seen_list) and len(
                    list(filter(lambda x: not x[1], cur_window))) == 0:
                break
            # measure the deltas
            chosen_window = list(filter(lambda x: x[1], cur_window))
            # deltas between the "True" pointers
            chosen_deltas = set()
            for i in range(len(chosen_window) - 1):
                chosen_deltas.add(chosen_window[i + 1][0] -
                                  chosen_window[i][0])
            # All possible deltas between adjacent pointers
            seen_deltas = set()
            for i in range(len(cur_window) - 1):
                seen_deltas.add(cur_window[i + 1][0] - cur_window[i][0])
            new_chosen = None
            # check for a pattern
            if len(seen_deltas) <= len(chosen_deltas):
                new_chosen = list(filter(lambda x: not x[1], cur_window))[0]
            # check if the window starts with a candidate, that is right near a "True" pointer
            elif not cur_window[0][1]:
                first_seen = cur_window[0]
                seen_addr = first_seen[0]
                for candidate in [
                        seen_addr - self._analyzer.data_fptr_alignment,
                        seen_addr + self._analyzer.data_fptr_alignment
                ]:
                    if candidate in approved_eas:
                        new_chosen = first_seen
                        break
            # check if found a match
            if new_chosen is not None:
                # re-insert ourselves with our new values
                our_index = cur_window.index(new_chosen)
                cur_window = cur_window[:our_index] + [
                    (new_chosen[0], True)
                ] + cur_window[our_index + 1:]
                # mark the pointer
                cur_ea = new_chosen[0]
                value = self._analyzer.parseAdderss(cur_ea)
                func_value = self._analyzer.cleanPtr(value)
                code_type = self._analyzer.ptrCodeType(value)
                local_ref_ptrs[func_value].add(code_type)
                ptrs_mappings[func_value].add(cur_ea)
                approved_ptrs.append((cur_ea, value))
                marked_artifacts.append((cur_ea, True))
                approved_eas.add(cur_ea)
                self._analyzer.logger.debug(
                    "Located new fptr from 0x%x to 0x%x (type: %d)", cur_ea,
                    func_value, code_type)
            # advance the window
            cur_window = cur_window[1:]

        # filter the pointers (we could have false positives)
        disqualified_addresses = set()
        for cur_ea, raw_address in approved_ptrs:
            fixed_address = self._analyzer.cleanPtr(raw_address)
            disqualified = False
            # check if already disqualified
            if fixed_address not in ptrs_mappings:
                continue
            # Several code types for the same address, we take no chances and remove them all
            if len(local_ref_ptrs[fixed_address]) != 1:
                disqualified = True
            # Check if the code type is even legal for that address
            else:
                wanted_code_type = list(local_ref_ptrs[fixed_address])[0]
                orig_code_type = self._analyzer.codeType(fixed_address)
                idc.ida_bytes.del_items(fixed_address, 0,
                                        self._analyzer.addressSize())
                if orig_code_type != wanted_code_type:
                    self._analyzer.setCodeType(fixed_address,
                                               fixed_address + 4,
                                               wanted_code_type)
                if idc.create_insn(fixed_address) == 0:
                    disqualified = True
                # Always clean after ourselves
                ida_bytes.del_items(fixed_address, 0,
                                    self._analyzer.addressSize())
                if orig_code_type != wanted_code_type:
                    self._analyzer.setCodeType(
                        fixed_address,
                        fixed_address + self._analyzer.addressSize(),
                        orig_code_type)
            # We are OK, can continue
            if not disqualified:
                continue
            # Found a false function pointer
            # Be cautious with the removals, we could have duplicates
            if fixed_address in self._ptrs_mappings:
                self._ptrs_mappings.pop(fixed_address)
            disqualified_addresses.add(raw_address)
            marked_artifacts.remove((cur_ea, True))
            # no need to remove from local_ref_ptrs, as the global variable only gets the approved values
            # no need to remove from approved_eas, as this data set isn't used anymore
            self._analyzer.logger.debug(
                "Disqualified (code) pointer 0x%08x from 0x%08x (type %d, seen types %s)",
                fixed_address, cur_ea, wanted_code_type,
                local_ref_ptrs[fixed_address])

        # Now filter them based on scoped range from other artifacts
        marked_artifacts.sort(key=lambda x: x[0])
        cur_index = 0
        prev_artifact = None
        while cur_index < len(marked_artifacts) - 1:
            cur_ea, is_fptr = marked_artifacts[cur_index]
            next_ea, _ = marked_artifacts[cur_index + 1]
            # Only check ourselves against the next in line
            if cur_ea + FPTR_LOCALITY_RANGE < next_ea:
                if prev_artifact is None and is_fptr:
                    # we should be disqualified
                    raw_address = self._analyzer.parseAdderss(cur_ea)
                    wanted_code_type = self._analyzer.ptrCodeType(raw_address)
                    fixed_address = self._analyzer.cleanPtr(raw_address)
                    # Be cautious with the removals, we could have duplicates
                    if fixed_address in self._ptrs_mappings:
                        self._ptrs_mappings.pop(fixed_address)
                    disqualified_addresses.add(raw_address)
                    self._analyzer.logger.debug(
                        "Disqualified (scope) pointer 0x%08x from 0x%08x (type %d))",
                        fixed_address, cur_ea, wanted_code_type)
                # set the prev artifact
                prev_artifact = None
                # check the next element
                cur_index += 1
            # We are linking to the next element, so he is legit too
            else:
                prev_artifact = next_ea
                cur_index += 1

        # mark the pointers
        for cur_ea, raw_address in filter(
                lambda x: x[1] not in disqualified_addresses, approved_ptrs):
            self._ref_ptrs[self._analyzer.cleanPtr(
                raw_address)] = self._analyzer.ptrCodeType(raw_address)
            self._analyzer.markCodePtr(cur_ea, raw_address)

        # print some results
        self._analyzer.logger.info(
            "Found %d different potential function pointer destinations",
            len(self._ref_ptrs))