def get_next_line(line):
     return sark.Line(line.ea + len(line.bytes))
import sark
import idaapi
import idautils

anim = sark.structure.get_struct('AnimationFrame')
while idaapi.is_debugger_on():

    dataseg =  sark.Segment(name='dataseg').ea
    anim_offset = idaapi.get_word(sark.Line(ea=dataseg + idautils.cpu.di + 2).ea)
    anim_addr = dataseg + anim_offset
    idaapi.doStruct(anim_addr, 6, anim)
    idaapi.jumpto(sark.Segment(name='dataseg').ea + anim_offset)
    idaapi.continue_process()
    idaapi.wait_for_next_event(2, 10000)
 def safe_name(address):
     name = sark.Line(address).name
     if name[0:4] == 'unk_':
          return "0x%x" % address
     else:
         return  "%s" % name
Exemple #4
0
            idc.MakeWord(line.ea)

            val = Word(line.ea)
            if val > 31:  #ignore data references to small values (like r0-r31)
                idc.OpOff(line.ea, 0, ram_segment.startEA)

    print "all lines in 0x%x - 0x%x are now words" % (ram_segment.startEA,
                                                      ram_segment.endEA)

    #pointify
    print "looking for off_{} to rename to {}_ptr"
    counter = 0
    for (name_ea, name) in idautils.Names():
        logger.debug("looking for off_ %s @ 0x%x" % (name, name_ea))
        Wait()
        for xref in sark.Line(name_ea).xrefs_to:
            logger.debug("considering xref to %s at 0x%x" % (name, xref.frm))
            original_name = sark.Line(xref.frm).name
            if original_name.startswith("off_"):
                i = 0
                pointer_name = name + "_ptr"
                while sark.Line(name=pointer_name).ea != idc.BADADDR:
                    pointer_name = name + "_ptr%d" % i
                    i += 1

                sark.Line(xref.frm).name = pointer_name
                logger.debug("renamed %s to %s" % (name, pointer_name))
                counter += 1

    print "renamed %d pointers" % counter
Exemple #5
0
def functionScan(analyzer, scs):
    """Scan the code segment and try to define functions.

    Args:
        analyzer (instance): analyzer instance to be used
        scs (list): list of (sark) code segments

    Notes
    -----
        An attempt to declare a function will occur if we found:
        1. Code line after a previous function - and it looks like the beginning of a function of the same code type
        2. Unknown after a previous function - and it looks like the beginning of a function of the estimated code type
    """
    for sc in scs:
        analyzer.logger.info("Function scanning code segment: 0x%x - 0x%x",
                             sc.start_ea, sc.end_ea)
        search_func = False
        just_started = True
        line = sark.Line(sc.start_ea)
        while line.start_ea < sc.end_ea:
            # we don't care about data lines
            if line.is_data:
                line = line.next
                continue
            # check for code lines
            if line.is_code:
                try:
                    sark.Function(line.start_ea)
                    search_func = False
                    just_started = True
                    line = line.next
                    continue
                except sark.exceptions.SarkNoFunction:
                    if just_started:
                        just_started = False
                    else:
                        search_func = True
            # If we are searching for a function, simply continue
            if search_func or analyzer.switch_identifier.isSwitchCase(
                    line.start_ea):
                line = line.next
                continue
            original_code_type = analyzer.codeType(line.start_ea)
            # If this is code, check that it matches the start of a function, and make it a function
            if line.is_code and analyzer.supportedCodeType(original_code_type) and \
                        analyzer.func_classifier.predictFunctionStartMixed(line.start_ea):
                if not ida_funcs.add_func(line.start_ea):
                    line = line.next
                else:
                    analyzer.logger.debug("Declared a function at: 0x%x",
                                          line.start_ea)
                continue
            # Code, and doesn't look like a function's start
            if line.is_code:
                # skip for now
                line = line.next
                continue
            # If unknown, check if a function and don't try to keep the same code type
            if line.is_unknown:
                guess_code_type = analyzer.func_classifier.predictFunctionStartType(
                    line.start_ea)
                if analyzer.func_classifier.predictFunctionStart(
                        line.start_ea, guess_code_type):
                    if original_code_type != guess_code_type:
                        analyzer.setCodeType(line.start_ea, line.start_ea + 1,
                                             guess_code_type)
                    if not ida_funcs.add_func(line.start_ea):
                        if original_code_type != guess_code_type:
                            analyzer.setCodeType(line.start_ea,
                                                 line.start_ea + 1,
                                                 original_code_type)
                        line = line.next
                    else:
                        analyzer.logger.debug(
                            "Declared a function at: 0x%x (Type %d, Local type %d)",
                            line.start_ea, guess_code_type, original_code_type)
                    continue
                # otherwise, do nothing
                line = line.next
                continue
Exemple #6
0
def thumbsUp(analyzer, sc, aggressive=False, align=False):
    """Use various metrics in order to locate / fix code type transitions.

    Args:
        analyzer (instance): analyzer instance to be used
        sc (segment): (sark) code segment to work on
        aggressive (bool, optional): True iff should use aggressive heuristics (False by default)
        align (bool, optional): True iff should use align-based heuristics (False by default)

    Notes
    -----
        1. Convert (cancel) a code region that is contained inside the same function, and contains unexplored bytes (not a Chunk, and contains no functions)
        2. Convert (cancel) a code region that is misaligned and contains no functions
        3. Aggressive - Convert (cancel) a code region if the classifier doesn't agree on it's start
        4. Aggressive - Convert (cancel) a code region if it contains illegal code lines / unknowns, and it contains no functions
        5. Aggressive - Convert (cancel) a pointed code region that could be misinterpreted, and that contains no functions (+ delete the fptr)
        6. Aggressive - Convert (cancel) a code region that begins on a function start, that could be misinterpreted, and that contains no functions
        7. Resize a code region that needs a little alignment
        8. In all of the heuristics, if the code region before us was OK and we merged with him, there is no need to check it again.
    """
    regions_fixed = 1
    line = sark.Line(sc.start_ea)
    regions = CodeRegions()
    first_round = True
    is_fptr_pointed = False
    code_aligned = False
    region_start = 0
    metric = None
    # Only continue if we changed something during the current round
    while regions_fixed > 0:
        regions_fixed = 0
        starting_new_region = True  # edge case for the first line in the section
        dummy_mode = False
        prev_code_type = None
        region_converted = False
        region_code_type = None
        if not first_round:
            interesting_regions = regions.changedRegions()
            analyzer.logger.debug("%d interesting regions",
                                  len(interesting_regions))
            # edge case, if we have nothing to do
            if len(interesting_regions) == 0:
                break
            line = sark.Line(interesting_regions[0].start)
            region_offset = -1
        # iterate the current region
        while line.start_ea < sc.end_ea:
            if not starting_new_region:
                # check if we found a transitions
                new_code_type = analyzer.codeType(line.start_ea)
                # no change, just keep on
                if region_code_type == new_code_type:
                    if not dummy_mode:
                        metric.record(line)
                    line = line.next
                    continue
                # we found a transition
                region_end = line.start_ea
                region_converted = False
                if first_round:
                    region = CodeRegion(region_start, region_end,
                                        region_code_type)
                    regions.insert(region)
                # in dummy mode, don't do a thing
                if dummy_mode:
                    metrics = []
                    analyzer.logger.debug(
                        "Dummy region of code type %d in range 0x%x - 0x%x",
                        region_code_type, region_start, region_end)
                # actually do something
                else:
                    # get the metrics
                    metric.stop(region_end)
                    # suffix / align metrics
                    align_metric = metric.alignMetric()
                    metrics = [metric] + ([align_metric]
                                          if align_metric is not None else [])
                    first_metric_region_fixed = True
                    aligned_region_fixed = True
                # Examine both metrics
                for code_metric in metrics:
                    contains_functions = code_metric.containsFunctions()
                    unknown_count, unknown_ratio = code_metric.unknowns()
                    illegal_count, illegal_ratio = code_metric.illegals()
                    has_unknown_or_illegal = unknown_count > 0 or illegal_count > 0
                    containing_function = code_metric.containingFunction()
                    start_function = code_metric.startFunction()
                    metric_region_start, metric_region_end = code_metric.borders(
                    )
                    metric_region_size = metric_region_end - metric_region_start
                    # special case for the last metric
                    if code_metric == align_metric:
                        aligned_region = True
                        metric_name = "Aligned"
                    else:
                        aligned_region = False
                        metric_name = "Regular"
                    # debug prints
                    analyzer.logger.debug(
                        "%s Metric: Code type %d used in range 0x%x - 0x%x (Pointed: %s, Contains functions: %s)",
                        metric_name, region_code_type,
                        metric_region_start, metric_region_end,
                        str(is_fptr_pointed), contains_functions)
                    if unknown_count > 0:
                        analyzer.logger.debug(
                            "Unknowns %d / %d Overall size = %f%%",
                            unknown_count, metric_region_size,
                            unknown_ratio * 100)
                    if illegal_count > 0:
                        analyzer.logger.debug(
                            "Illegals %d / %d Overall size = %f%%",
                            illegal_count, metric_region_size,
                            illegal_ratio * 100)
                    # Check if we can flip this region
                    # 1. The entire code region is contained inside the same function, and contains unexplored bytes (not a Chunk, and contains no functions)
                    if containing_function is not None and containing_function.start_ea < metric_region_start and metric_region_end <= containing_function.end_ea and\
                       has_unknown_or_illegal and not contains_functions:
                        analyzer.logger.info(
                            "Code region is contained inside a single function - cancel it"
                        )
                        convertRegion(analyzer, metric_region_start,
                                      metric_region_end)
                        regions.convert(region, new_code_type)
                        region_converted = True
                        regions_fixed += 1
                    # 2. Misaligned region
                    elif not aligned_region and not code_aligned and not contains_functions:
                        analyzer.logger.info(
                            "Misaligned code region without any functions - cancel it"
                        )
                        convertRegion(analyzer, metric_region_start,
                                      metric_region_end)
                        regions.convert(region, new_code_type)
                        region_converted = True
                        regions_fixed += 1
                    # 3. Aggressive - Classifier doesn't agree about this region's start
                    elif aggressive and\
                         not aligned_region and\
                         analyzer.func_classifier.predictFunctionStartType(metric_region_start) != region_code_type and\
                         analyzer.func_classifier.predictFunctionStartType(metric_region_end)   == new_code_type and\
                         not contains_functions:
                        analyzer.logger.info(
                            "Classifier doesn't agree about the code region's start, and it has no functions - cancel it"
                        )
                        convertRegion(analyzer, metric_region_start,
                                      metric_region_end)
                        regions.convert(region, new_code_type)
                        region_converted = True
                        regions_fixed += 1
                    # 4. Aggressive - Unknowns and no functions
                    elif aggressive and\
                         has_unknown_or_illegal and not contains_functions:
                        analyzer.logger.info(
                            "Code region contains unexplored bytes, and it has no functions - fixing it"
                        )
                        convertRegion(analyzer, metric_region_start,
                                      metric_region_end)
                        regions.convert(region, new_code_type)
                        region_converted = True
                        regions_fixed += 1
                    # 5. Aggressive - pointed region that could be misinterpreted + no functions
                    elif aggressive and\
                         not aligned_region and\
                         is_fptr_pointed and\
                         prev_code_type is not None and\
                         ((not analyzer.func_classifier.predictFunctionEnd(metric_region_start, prev_code_type)) or\
                            ((metric_region_size <= analyzer.addressSize()) and not analyzer.func_classifier.predictFunctionEnd(metric_region_end,   region_code_type)) or\
                            ((metric_region_size <= analyzer.addressSize()) and not analyzer.func_classifier.predictFunctionStart(metric_region_end, new_code_type)) or\
                             analyzer.func_classifier.predictFunctionStart(metric_region_start, new_code_type)) and\
                         not contains_functions:
                        analyzer.logger.info(
                            "Code region is fptr pointed, classifier says it's not a function end, and it has no functions - cancel it"
                        )
                        # delete the fptr
                        analyzer.fptr_identifier.deleteFptr(
                            metric_region_start, region_code_type)
                        convertRegion(analyzer, metric_region_start,
                                      metric_region_end)
                        regions.convert(region, new_code_type)
                        region_converted = True
                        regions_fixed += 1
                    # 6. Aggressive - region on function start, that could be misinterpreted + no functions
                    elif aggressive and\
                         not aligned_region and\
                         start_function is not None and metric_region_start == start_function.start_ea and\
                         analyzer.func_classifier.predictFunctionStart(metric_region_start, new_code_type) and\
                         not contains_functions:
                        analyzer.logger.info(
                            "Code region is a function start, classifier prefers a different code type, and it has no functions - cancel it"
                        )
                        convertRegion(analyzer, metric_region_start,
                                      metric_region_end)
                        regions.convert(region, new_code_type)
                        region_converted = True
                        regions_fixed += 1
                    # 7. Needs a little alignment
                    elif not aligned_region and not code_aligned:
                        analyzer.logger.debug(
                            "Code region is not aligned, align it down (resize)"
                        )
                        resized_start = analyzer.alignTransitionAddress(
                            metric_region_start, region_code_type)
                        resizeRegion(analyzer, metric_region_start,
                                     metric_region_end, resized_start,
                                     metric_region_end)
                        regions.resizeStart(region, resized_start)
                        regions_fixed += 1
                    # Nothing for now
                    else:
                        if aligned_region:
                            aligned_region_fixed = False
                        else:
                            first_metric_region_fixed = False
                    # Aligned region should start with a function
                    if aligned_region and aligned_region_fixed:
                        ida_funcs.add_func(metric_region_start)
                    # Break the loop and start the new region
                    if first_metric_region_fixed:
                        break
            # if our region was converted, there is no need to scan the current region (partial data)
            dummy_mode = region_converted and first_round
            # new region - check if finished the list
            if not first_round:
                region_offset += 1
                if region_offset >= len(interesting_regions):
                    break
                # check if we need to skip the next one too
                if region_converted and region.next == interesting_regions[
                        region_offset] and interesting_regions[
                            region_offset].code_type == new_code_type:
                    region_offset += 1
                    if region_offset >= len(interesting_regions):
                        break
                region = interesting_regions[region_offset]
                line = sark.Line(region.start)
                region_start = line.start_ea
                if region.prev is not None:
                    prev_code_type = region.prev.code_type
                else:
                    prev_code_type = None
            # the simple case
            else:
                # the code type could have changed, so we re-sample it
                if region_code_type is not None:
                    prev_code_type = analyzer.codeType(region_start)
                region_start = line.start_ea
            # get the current code type (even in dummy mode)
            region_code_type = analyzer.codeType(line.start_ea)
            if not dummy_mode:
                code_aligned = analyzer.isCodeTransitionAligned(region_start)
                starting_new_region = False
                # measure the metrics
                metric = CodeMetric(analyzer,
                                    region_start,
                                    measure_align=align)
                metric.start(line)
                # check if started because of one of our function pointers
                is_fptr_pointed = analyzer.fptr_identifier.isPointedFunction(
                    region_start)
            # advance to the next line
            if first_round:
                line = line.next
        # log the result
        analyzer.logger.info("Fixed %d code regions in this iteration",
                             regions_fixed)
        first_round = False
def get_cur_type(ea, reg):
    opnds = sark.Line(ea).insn.operands
    for i in range(len(opnds)):
        if '[%s]' % get_reg_user_name(ea, reg) in opnds[i].text:
            str_id = ida_struct.get_struc_id(type_name)
            idc.op_stroff(ea, i, str_id, 0)
Exemple #8
0
def rename_function_by_aString_surrounding_call(aString,
                                                funcName,
                                                xref_func=first_xref,
                                                count_max=10,
                                                filtered_funcs=[],
                                                count_filtered_funcs=0,
                                                head_func=prev_head):
    global ERROR_MINUS_1
    if name_to_addr(funcName) != None:
        logmsg("%s already defined" % funcName)
        return True

    if filtered_funcs and count_filtered_funcs > 0:
        logmsg("ERROR: Only one argument is supported")
        return False

    # required functions to locate funcName
    for filtered_name in filtered_funcs:
        if name_to_addr(filtered_name) == None:
            logmsg("required function: %s missing, can't locate %s" %
                   (filtered_name, funcName))
            return False

    addr_str = name_to_addr(aString)
    if addr_str == None:
        return False
    addr_str_used = xref_func(addr_str)
    if addr_str_used == None:
        return False
    try:
        sark.Function(ea=addr_str_used)
    except sark.exceptions.SarkNoFunction:
        logmsg("No function at 0x%x when handling %s" %
               (addr_str_used, aString))
        return False

    count = 0
    e = addr_str_used
    bFound = False
    while count <= count_max:
        e = head_func(e)
        line = sark.Line(e)
        #print(line)
        try:
            insn = line.insn
        except sark.exceptions.SarkNoInstruction:
            logmsg(
                "data in the middle of instructions at 0x%x, not supported yet"
                % e)
            return False
        if insn.mnem == "BL":
            if len(insn.operands) != 1:
                logmsg("Wrong number of operands for BL at 0x%x" % e)
                return False
            curr_func_name = insn.operands[0].text
            # do we need to skip this "BL" or are we done?
            bFiltered = False
            if count_filtered_funcs > 0:
                logmsg("skipping filtered due to count: %d at 0x%x" %
                       (count_filtered_funcs, e))
                count_filtered_funcs -= 1
                bFiltered = True
            else:
                for filtered_name in filtered_funcs:
                    if curr_func_name == filtered_name:
                        logmsg("skipping filtered name: %s at 0x%x" %
                               (filtered_name, e))
                        bFiltered = True
                        break
            if bFiltered:
                count += 1
                continue
            func_addr = name_to_addr(curr_func_name)
            if func_addr == None:
                return False
            rename_address(func_addr, funcName)
            logmsg("%s = 0x%x" % (funcName, func_addr))
            bFound = True
            break
        count += 1
    if not bFound:
        logmsg("ERROR: %s not found" % funcName)
        return False
    return True
import sark
import idaapi
import idautils

anim = sark.structure.get_struct('TroggSpearImage')
end_of_frame = sark.structure.get_struct("EndOfAnimFrame")
dataseg = sark.Segment(name='dataseg').ea
# anim_offset = idaapi.get_word(sark.Line(ea=dataseg + idautils.cpu.di + 2).ea)
current_position = sark.Line().ea
# current_byte = idaapi.get_byte(current_position)

done = False

while not done:
    current_byte = idaapi.get_byte(current_position)
    if current_byte == 0xff:
        print("applying EndOfAnimFrame")
        idaapi.doStruct(current_position, 2, end_of_frame)
        next_byte = idaapi.get_byte(current_position + 1)
        if next_byte == 0xff:
            done = True
        current_position += 2
    elif current_byte < 0x80:
        # print(current_byte)
        print("applying AnimationFrame")
        test = idaapi.doStruct(current_position, 6, anim)
        # print(test)
        current_position += 6
        # print(hex(current_position-dataseg))
    else:
        done = True
Exemple #10
0
 def clear_all_highlights(self):
     for ea in self.lines:
         sark.Line(ea=ea).color = None
     self.lines.clear()
Exemple #11
0
def is_ret(ea):
    """
    Check if the current instruction a RET instruction
    """
    return sark.Line(ea).insn.is_ret
Exemple #12
0
def is_call(ea):
    """
    Check if the current instruction a CALL instruction
    """
    return sark.Line(ea).insn.is_call
Exemple #13
0
    mu = Uc(UC_ARCH_ARM, UC_MODE_THUMB)
    mu.mem_map(ADDRESS, 2 * 1024 * 1024)
    mu.mem_write(ADDRESS, code)
    mu.emu_start(ADDRESS | 1, ADDRESS + len(code))
    return mu.reg_read(UC_ARM_REG_R0)


left = []

system_addr = 0x0933C
printf_addr = 0x9174
sprintf_addr = 0x9470

address = sprintf_addr

my_line = sark.Line(address)
for xref in my_line.xrefs_to:
    line = sark.Line(xref.frm)
    code = ''
    for i in range(20):
        if detectMovR0Const(line):
            code = line.bytes + code
        line = line.prev
    r0 = simulate(code)
    print '---', r0
    arg = sark.Line(r0)
    if arg.is_string:
        print arg
    else:
        left.append(sark.Line(xref.frm))
Exemple #14
0
def _xrefs_from(function_ea):
    try:
        return sark.Function(function_ea).xrefs_from

    except exceptions.SarkNoFunction:
        return sark.Line(function_ea).xrefs_from
Exemple #15
0
import sark
import idaapi
import idautils

anim = sark.structure.get_struct('DrawData')
while idaapi.is_debugger_on():
    dataseg = sark.Segment(name='dataseg').ea
    anim_offset = sark.Line(ea=dataseg + idautils.cpu.di).ea
    anim_addr = dataseg + anim_offset
    idaapi.doStruct(anim_offset, 0x24, anim)
    idaapi.jumpto(anim_offset)
    idaapi.continue_process()
    idaapi.wait_for_next_event(2, 10000)
Exemple #16
0
    def get_regs_in_operand(self, ea, operand_idx):
        iorb = InsnOpndRegBits()

        r_reg2idx = dict()
        r_reg2idx['LR'] = 14
        r_reg2idx['SP'] = 13
        r_reg2idx['PC'] = 15
        for i in range(12 + 1):
            r_reg2idx['R%d' % i] = i

        d_reg2idx = dict()
        for i in range(31 + 1):
            d_reg2idx['D%d' % i] = i

        r_idx2reg = {}
        for k in r_reg2idx.keys():
            r_idx2reg[r_reg2idx[k]] = k

        d_idx2reg = {}
        for k in d_reg2idx.keys():
            d_idx2reg[d_reg2idx[k]] = k

        opnd = sark.Line(ea).insn.operands[operand_idx]

        # can't be sure that sark got all regs - for example,
        #   'ld16.bu d0, [a12]' doens't recognise a12
        all_regs = self.get_reg_list()

        operand_res = []
        # R0
        operand_res += [(r'^([^,\[\]]+)$', UsageBits.OP_UK)]
        # LR!
        operand_res += [(r'^([^,\[\]]+)\!$', UsageBits.OP_RW)]
        # [R0]
        operand_res += [(r'^\[([^,\[\]]+)\]$', UsageBits.OP_RD)]
        # [R0,#0x20]
        operand_res += [(r'^\[([^,\[\]]+),.*\]$', UsageBits.OP_RD)]
        # R0,LSR#2
        operand_res += [(r'^([^,\[\]]+),LS[RL]#[0-9]+$', UsageBits.OP_RD)]
        # [r12, 5]!
        operand_res += [(r'\[([^,]+)\,.*\]\!', UsageBits.OP_RW)]
        # [R0],#0x54
        operand_res += [(r'\[([^,]+)\],#.+$', UsageBits.OP_RW)]

        for operand_re, op_bits in operand_res:
            m = re.match(operand_re, opnd.text)
            if m is None:
                continue

            reg_set = set()
            e_reg = m.group(1)
            e_reg = RegName(ea, all_regs).canon(e_reg)

            if e_reg is not None:
                reg_set |= {e_reg}
            # Didn't recognise any such reg. Probably false positive. Try next
            else:
                continue

            for reg in reg_set:
                iorb.set_usage_bits(reg, operand_idx,
                                    op_bits | UsageBits.USAGE_EXPLICIT)

            for reg in (self.reg_expand(ea, reg_set) ^ reg_set):
                iorb.set_usage_bits(
                    reg, operand_idx,
                    op_bits | UsageBits.USAGE_IMPLICIT_COMPOSITE)

            return iorb

        # handle difficult operands
        operand_re = r'^\{([^,-]+([,-][^,-]+)+)\}$'  # {R4-R8,LR}
        op_bits = UsageBits.OP_UK
        m = re.match(operand_re, opnd.text)
        canon_list = self.get_reg_list()
        if m is not None:
            reg_set = set()
            reg_explicit_set = set()
            elem_list = m.group(1)
            for r in elem_list.split(','):
                # Not a range
                if '-' not in r:
                    e_reg = RegName(ea, canon_list).canon(r)
                    if e_reg is not None:
                        reg_set |= {e_reg}
                        reg_explicit_set |= {e_reg}
                # Is a range
                else:
                    s, e = r.split('-')

                    s = RegName(ea, canon_list).canon(s)
                    e = RegName(ea, canon_list).canon(e)

                    if (s is not None) and (e is not None):
                        if (s in r_reg2idx) and (e in r_reg2idx):
                            reg_explicit_set |= {s}
                            reg_explicit_set |= {e}
                            s_i = r_reg2idx[s]
                            e_i = r_reg2idx[e]
                            for i in range(s_i, e_i + 1):
                                reg_set |= {r_idx2reg[i]}
                        elif (s in d_reg2idx) and (e in d_reg2idx):
                            reg_explicit_set |= {s}
                            reg_explicit_set |= {e}
                            s_i = d_reg2idx[s]
                            e_i = d_reg2idx[e]
                            for i in range(s_i, e_i + 1):
                                reg_set |= {d_idx2reg[i]}
                        else:
                            logger.error('range with unknown - {}'.format(r))

            for reg in reg_set:
                if reg in reg_explicit_set:
                    iorb.set_usage_bits(reg, operand_idx,
                                        op_bits | UsageBits.USAGE_EXPLICIT)
                else:
                    iorb.set_usage_bits(
                        reg, operand_idx,
                        op_bits | UsageBits.USAGE_IMPLICIT_RANGE)

            for reg in (self.reg_expand(ea, reg_set) ^ reg_set):
                iorb.set_usage_bits(
                    reg, operand_idx,
                    op_bits | UsageBits.USAGE_IMPLICIT_COMPOSITE)

            return iorb

        return iorb
Exemple #17
0
    logger.addHandler(handler)
logger.setLevel(logging.WARNING)

try:
    import idautils
    import idaapi
    import sark

    ram_segment = None
    rom_segment = None
    for segment in sark.segments():
        if segment.name == 'RAM' or segment.name == '.data':
            ram_segment = segment
        elif segment.name == 'ROM' or segment.name == '.text':
            rom_segment = segment

    for (name_ea, name) in idautils.Names():
        if not name.startswith("USART"):
            continue
        logger.debug("looking for xrefs to %s @ 0x%x" % (name, name_ea))
        Wait()
        for xref in sark.Line(name_ea).xrefs_to:
            print "%s <-- 0x%x" % (name, xref.frm)

except:
    exc_type, exc_value, exc_traceback = sys.exc_info()
    logger.error("Uncaught exception",
                 exc_info=(exc_type, exc_value, exc_traceback))

idascript.exit()
        register_group = all_bases.get(key)
        base_name = register_group.attrib['name']
        name_in_module = register_group.attrib['name-in-module']
        base = int(register_group.attrib['offset'], 0)
        for register in root.findall(
                ".//modules/module/register-group[@name='%s']/register" %
                name_in_module):
            name = register.attrib['name']
            offset = int(register.attrib['offset'], 0)
            caption = register.attrib['caption']

            ioport_name = "%s_%s" % (base_name, name)

            #the ATxmega128a4u doens't have a register file mapped at 0x0 in RAM which is being forced by the avr IDA module; correct the location of all the mapped IO ports

            wrong_line = sark.Line(idaapi.get_name_ea(0, ioport_name))
            right_line = sark.Line(wrong_line.ea - 0x20)
            wrong_line.name = ""
            wrong_line.comments.repeat = ""

            offset = right_line.ea - r0_address

            #actually renaming the data locations in the first 0x20 will make IDA disassembly look bonkers because it uses the names of those data locations for its register names
            #just append a comment about the ioport location in question to the first 0x20
            if offset <= 0x20:
                previous_comment = right_line.comments.repeat
                ioport_comment = "io:%s" % ioport_name
                if previous_comment is None:
                    right_line.comments.repeat = ioport_comment
                else:
                    right_line.comments.repeat = previous_comment + " " + ioport_comment
if not logger.handlers:
    handler = logging.StreamHandler(stream=sys.stdout)
    logger.addHandler(handler)

try:
    import idautils
    import idaapi
    import sark

    data_vectors = {
        'r27': 'XH',
        'r26': 'XL',
        'r29': 'YH',
        'r28': 'YL',
        'r31': 'ZH',
        'r30': 'ZL'
    }

    for vector_register in data_vectors.keys():
        register_line = sark.Line(idaapi.get_name_ea(0, vector_register))

        register_line.name = data_vectors.get(vector_register)
        register_line.comments.repeat = "alias:%s" % vector_register

except:
    exc_type, exc_value, exc_traceback = sys.exc_info()
    logger.error("Uncaught exception",
                 exc_info=(exc_type, exc_value, exc_traceback))

idascript.exit()
Exemple #20
0
def dataScan(analyzer, scs):
    """Scan the code segments for orphan data blobs that represent analysis errors.

    Args:
        analyzer (instance): analyzer instance to be used
        scs (list): list of (sark) code segments
    """
    # First Scan - unreffed data chunks inside functions ==> should be converted to code
    for sc in scs:
        first_line = None
        end_line = None
        for line in sc.lines:
            # After the first, the rest of the lines should have 0 crefs
            if first_line is not None and ((not line.is_data)
                                           or len(list(line.drefs_to)) > 0
                                           or len(list(line.crefs_to)) > 0):
                end_line = line
            # we only care about data lines with a single cref from the previous line
            elif first_line is None and (
                (not line.is_data) or len(list(line.drefs_to)) > 0
                    or len(list(line.crefs_to)) != 1
                    or sark.Line(list(line.crefs_to)[0]).next != line):
                end_line = line
            # don't mark switch entries
            elif analyzer.switch_identifier.isSwitchEntry(line.start_ea):
                end_line = line
            # Finally, check if it could be a function of some type
            elif first_line is None:
                first_line = line
                continue
            # Found an adjacent suitable line
            else:
                continue
            # Now check if we found something (end_line is always != None at this point)
            if first_line is not None and end_line is not None:
                chunk_start = first_line.start_ea
                chunk_end = end_line.start_ea
                # check that we can deduce anything on this current code type
                if not analyzer.supportedCodeType(
                        analyzer.codeType(chunk_start)):
                    continue
                # check that the chunk before us is not the end of a function
                if analyzer.func_classifier.predictFunctionEnd(chunk_start):
                    # shouldn't really happen, do nothing in this case
                    pass
                # data chunk in the middle of a function, and not at it's end - convert it to code
                else:
                    analyzer.logger.debug(
                        "In-Function data chunk at: 0x%x - 0x%x (%d)",
                        chunk_start, chunk_end, chunk_end - chunk_start)
                    ida_bytes.del_items(chunk_start, 0,
                                        chunk_end - chunk_start)
                    idc.create_insn(chunk_start)
                # reset the vars
                first_line = None
                end_line = None

    # Second scan - unreffed data chunks outside of functions ==> new functions, possibly of different code type
    size_limit = analyzer.func_classifier.functionStartSize()
    analyzer.logger.debug("Size limit for data scan is: %d", size_limit)
    conversion_candidates = []
    # recon pass
    for sc in scs:
        first_line = None
        end_line = None
        for line in sc.lines:
            # we only care about data lines without xrefs
            if (not line.is_data) or len(list(line.crefs_to)) > 0 or len(
                    list(line.drefs_to)) > 0:
                end_line = line
            # check if it's big enough for the classifier
            elif line.size < size_limit:
                end_line = line
            # check if it looks like a string
            elif analyzer.str_identifier.isLocalAsciiString(line.start_ea,
                                                            check_refs=False):
                analyzer.str_identifier.defineAsciiString(line.start_ea)
                end_line = line
            # make sure it isn't a switch entry
            elif analyzer.switch_identifier.isSwitchEntry(line.start_ea):
                end_line = line
            # Finally, check if it could be a function of some type
            elif first_line is None:
                first_line = line
                continue
            # Found an adjacent suitable line
            else:
                continue
            # Now check if we found something (end_line is always != None at this point)
            if first_line is not None and end_line is not None:
                chunk_start = first_line.start_ea
                chunk_end = end_line.start_ea
                guess_code_type = analyzer.func_classifier.predictFunctionStartType(
                    chunk_start)
                original_code_type = analyzer.codeType(chunk_start)
                analyzer.logger.debug(
                    "Found a data chunk at: 0x%x - 0x%x (%d), (Type %d, Local type %d)",
                    chunk_start, chunk_end, chunk_end - chunk_start,
                    guess_code_type, original_code_type)
                # Check if this is the beginning of a function
                if analyzer.func_classifier.predictFunctionStart(
                        chunk_start, guess_code_type):
                    conversion_candidates.append(
                        (chunk_start, chunk_end, guess_code_type,
                         original_code_type))
                # reset the vars
                first_line = None
                end_line = None
    # conversion pass
    for chunk_start, chunk_end, guess_code_type, original_code_type in conversion_candidates:
        analyzer.logger.info(
            "Found an isolated data chunk at: 0x%x - 0x%x (%d), (Type %d, Local type %d)",
            chunk_start, chunk_end, chunk_end - chunk_start, guess_code_type,
            original_code_type)
        ida_bytes.del_items(chunk_start, 0, chunk_end - chunk_start)
        if original_code_type != guess_code_type:
            analyzer.setCodeType(chunk_start, chunk_end, guess_code_type)
        idc.plan_and_wait(chunk_start, chunk_end)
        ida_funcs.add_func(chunk_start)
Exemple #21
0
    def locateDataPtrs(self, scs, sds):
        """Locate all data / code fptrs in the given set of segments.

        Args:
            scs (list): list of (sark) code segments
            sds (list): list of (sark) data segments
        """
        local_ref_ptrs = defaultdict(set)
        seen_list = []
        approved_ptrs = []
        approved_eas = set()
        ptrs_mappings = defaultdict(set)
        marked_artifacts = []
        for sd in sds:
            cur_ea = pad(sd.start_ea, self._analyzer.data_fptr_alignment)
            while cur_ea < sd.end_ea:
                line = sark.Line(cur_ea)
                if line.is_string:
                    cur_ea += pad(line.size,
                                  self._analyzer.data_fptr_alignment)
                    continue
                # check for a function ptr
                value = self._analyzer.parseAdderss(cur_ea)
                # make sure it is valid
                if self.isValidCodePtr(value, scs):
                    func_value = self._analyzer.cleanPtr(value)
                    code_type = self._analyzer.ptrCodeType(value)
                    # is seen
                    if func_value in local_ref_ptrs:
                        local_ref_ptrs[func_value].add(code_type)
                        ptrs_mappings[func_value].add(cur_ea)
                        self._analyzer.logger.debug(
                            "Located a fptr from 0x%x to 0x%x (type: %d) - Undeclared function",
                            cur_ea, func_value, code_type)
                        if self.isPrintableAddress(value):
                            self._analyzer.logger.debug(
                                "Looks like a printable FP: 0x%x", value)
                        approved_ptrs.append((cur_ea, value))
                        approved_eas.add(cur_ea)
                        seen_list.append((cur_ea, True))
                        marked_artifacts.append((cur_ea, True))
                    # is start of real function, from the correct type
                    elif self._analyzer.codeType(
                            func_value
                    ) == code_type and self._analyzer.func_classifier.isFuncStart(
                            func_value):
                        local_ref_ptrs[func_value].add(code_type)
                        ptrs_mappings[func_value].add(cur_ea)
                        self._analyzer.logger.debug(
                            "Located a fptr from 0x%x to 0x%x (type: %d) - Existing function",
                            cur_ea, func_value, code_type)
                        approved_ptrs.append((cur_ea, value))
                        approved_eas.add(cur_ea)
                        seen_list.append((cur_ea, True))
                        marked_artifacts.append((cur_ea, True))
                    # is start of function
                    elif self._analyzer.func_classifier.predictFunctionStartMixed(
                            func_value, known_type=code_type):
                        local_ref_ptrs[func_value].add(code_type)
                        ptrs_mappings[func_value].add(cur_ea)
                        self._analyzer.logger.debug(
                            "Located a fptr from 0x%x to 0x%x (type: %d) - Undeclared function",
                            cur_ea, func_value, code_type)
                        if self.isPrintableAddress(value):
                            self._analyzer.logger.debug(
                                "Looks like a printable FP: 0x%x", value)
                        approved_ptrs.append((cur_ea, value))
                        approved_eas.add(cur_ea)
                        seen_list.append((cur_ea, True))
                        marked_artifacts.append((cur_ea, True))
                    # only a candidate - may be will be approved later
                    else:
                        seen_list.append((cur_ea, False))
                        # check for an analysis problem
                        if len(list(line.drefs_from)) > 0:
                            idc.del_dref(cur_ea, value)
                            idc.del_dref(cur_ea, func_value)
                # Check for a valid data pointer
                elif self.isValidDataPtr(value, sds):
                    # make it a data pointer
                    self._analyzer.markDataPtr(cur_ea, value)
                    self._analyzer.logger.debug(
                        "Located a data ptr from 0x%x to 0x%x", cur_ea, value)
                    marked_artifacts.append((cur_ea, False))
                    marked_artifacts.append((value, False))
                # continue forward
                cur_ea += pad(self._analyzer.addressSize(),
                              self._analyzer.data_fptr_alignment)

        # check if there is some pattern we can use to find more fptrs
        chosen_threshold = 7
        cur_window = []
        window_index = 0
        # NOTE: this step is too risky if there are Read-Only data constants inside the text section
        while window_index < len(
                seen_list) and not self._analyzer.isCodeMixedWithData():
            # If we didn't reach the end, and
            # 1. The window doesn't have enough "True" pointers
            # 2. The windows contains only "True" pointers
            # Slide the window onward
            while window_index < len(seen_list) and (
                    len(list(filter(lambda x: x[1],
                                    cur_window))) < chosen_threshold
                    or len(list(filter(lambda x: not x[1], cur_window))) == 0):
                # If we are above the threshold (meaning that cond #2 applies), kick out the first ptr (which is a "True" ptr)
                if chosen_threshold < len(
                        list(filter(lambda x: x[1], cur_window))):
                    cur_window = cur_window[1:]
                # Add a new pointer at the end of our window
                cur_window.append(seen_list[window_index])
                window_index += 1
            # Sanity check: check if we have a candidate
            if window_index == len(seen_list) and len(
                    list(filter(lambda x: not x[1], cur_window))) == 0:
                break
            # measure the deltas
            chosen_window = list(filter(lambda x: x[1], cur_window))
            # deltas between the "True" pointers
            chosen_deltas = set()
            for i in range(len(chosen_window) - 1):
                chosen_deltas.add(chosen_window[i + 1][0] -
                                  chosen_window[i][0])
            # All possible deltas between adjacent pointers
            seen_deltas = set()
            for i in range(len(cur_window) - 1):
                seen_deltas.add(cur_window[i + 1][0] - cur_window[i][0])
            new_chosen = None
            # check for a pattern
            if len(seen_deltas) <= len(chosen_deltas):
                new_chosen = list(filter(lambda x: not x[1], cur_window))[0]
            # check if the window starts with a candidate, that is right near a "True" pointer
            elif not cur_window[0][1]:
                first_seen = cur_window[0]
                seen_addr = first_seen[0]
                for candidate in [
                        seen_addr - self._analyzer.data_fptr_alignment,
                        seen_addr + self._analyzer.data_fptr_alignment
                ]:
                    if candidate in approved_eas:
                        new_chosen = first_seen
                        break
            # check if found a match
            if new_chosen is not None:
                # re-insert ourselves with our new values
                our_index = cur_window.index(new_chosen)
                cur_window = cur_window[:our_index] + [
                    (new_chosen[0], True)
                ] + cur_window[our_index + 1:]
                # mark the pointer
                cur_ea = new_chosen[0]
                value = self._analyzer.parseAdderss(cur_ea)
                func_value = self._analyzer.cleanPtr(value)
                code_type = self._analyzer.ptrCodeType(value)
                local_ref_ptrs[func_value].add(code_type)
                ptrs_mappings[func_value].add(cur_ea)
                approved_ptrs.append((cur_ea, value))
                marked_artifacts.append((cur_ea, True))
                approved_eas.add(cur_ea)
                self._analyzer.logger.debug(
                    "Located new fptr from 0x%x to 0x%x (type: %d)", cur_ea,
                    func_value, code_type)
            # advance the window
            cur_window = cur_window[1:]

        # filter the pointers (we could have false positives)
        disqualified_addresses = set()
        for cur_ea, raw_address in approved_ptrs:
            fixed_address = self._analyzer.cleanPtr(raw_address)
            disqualified = False
            # check if already disqualified
            if fixed_address not in ptrs_mappings:
                continue
            # Several code types for the same address, we take no chances and remove them all
            if len(local_ref_ptrs[fixed_address]) != 1:
                disqualified = True
            # Check if the code type is even legal for that address
            else:
                wanted_code_type = list(local_ref_ptrs[fixed_address])[0]
                orig_code_type = self._analyzer.codeType(fixed_address)
                idc.ida_bytes.del_items(fixed_address, 0,
                                        self._analyzer.addressSize())
                if orig_code_type != wanted_code_type:
                    self._analyzer.setCodeType(fixed_address,
                                               fixed_address + 4,
                                               wanted_code_type)
                if idc.create_insn(fixed_address) == 0:
                    disqualified = True
                # Always clean after ourselves
                ida_bytes.del_items(fixed_address, 0,
                                    self._analyzer.addressSize())
                if orig_code_type != wanted_code_type:
                    self._analyzer.setCodeType(
                        fixed_address,
                        fixed_address + self._analyzer.addressSize(),
                        orig_code_type)
            # We are OK, can continue
            if not disqualified:
                continue
            # Found a false function pointer
            # Be cautious with the removals, we could have duplicates
            if fixed_address in self._ptrs_mappings:
                self._ptrs_mappings.pop(fixed_address)
            disqualified_addresses.add(raw_address)
            marked_artifacts.remove((cur_ea, True))
            # no need to remove from local_ref_ptrs, as the global variable only gets the approved values
            # no need to remove from approved_eas, as this data set isn't used anymore
            self._analyzer.logger.debug(
                "Disqualified (code) pointer 0x%08x from 0x%08x (type %d, seen types %s)",
                fixed_address, cur_ea, wanted_code_type,
                local_ref_ptrs[fixed_address])

        # Now filter them based on scoped range from other artifacts
        marked_artifacts.sort(key=lambda x: x[0])
        cur_index = 0
        prev_artifact = None
        while cur_index < len(marked_artifacts) - 1:
            cur_ea, is_fptr = marked_artifacts[cur_index]
            next_ea, _ = marked_artifacts[cur_index + 1]
            # Only check ourselves against the next in line
            if cur_ea + FPTR_LOCALITY_RANGE < next_ea:
                if prev_artifact is None and is_fptr:
                    # we should be disqualified
                    raw_address = self._analyzer.parseAdderss(cur_ea)
                    wanted_code_type = self._analyzer.ptrCodeType(raw_address)
                    fixed_address = self._analyzer.cleanPtr(raw_address)
                    # Be cautious with the removals, we could have duplicates
                    if fixed_address in self._ptrs_mappings:
                        self._ptrs_mappings.pop(fixed_address)
                    disqualified_addresses.add(raw_address)
                    self._analyzer.logger.debug(
                        "Disqualified (scope) pointer 0x%08x from 0x%08x (type %d))",
                        fixed_address, cur_ea, wanted_code_type)
                # set the prev artifact
                prev_artifact = None
                # check the next element
                cur_index += 1
            # We are linking to the next element, so he is legit too
            else:
                prev_artifact = next_ea
                cur_index += 1

        # mark the pointers
        for cur_ea, raw_address in filter(
                lambda x: x[1] not in disqualified_addresses, approved_ptrs):
            self._ref_ptrs[self._analyzer.cleanPtr(
                raw_address)] = self._analyzer.ptrCodeType(raw_address)
            self._analyzer.markCodePtr(cur_ea, raw_address)

        # print some results
        self._analyzer.logger.info(
            "Found %d different potential function pointer destinations",
            len(self._ref_ptrs))
    def locateLocalConstants(self, scs, sds):
        """Locate and define all of the local strings / numeric constants, that match our observed pattern.

        Args:
            scs (list): List of (sark) code segments.
            sds (list): List of (sark) data segments.
        """
        self._analyzer.logger.info(
            "Locating local strings / constants in the code sections")
        for sc in scs:
            cur_ea = pad(sc.startEA, self._local_alignment)
            while cur_ea < sc.endEA:
                # check for a data constant
                if self.isDataConstant(cur_ea):
                    # check for a string (refs already checked)
                    if self._analyzer.str_identifier.isLocalAsciiString(
                            cur_ea, check_refs=False):
                        length = self._analyzer.str_identifier.defineAsciiString(
                            cur_ea)
                        padded_length = pad(length, self._local_alignment)
                        if padded_length != length:
                            idc.MakeUnknown(cur_ea + length,
                                            padded_length - length, 0)
                            idc.MakeData(cur_ea + length, 0,
                                         padded_length - length, 0)
                        cur_ea += padded_length
                    # This means it is a constant
                    else:
                        if self._local_pad is None:
                            idc.MakeData(cur_ea, 0, self._local_alignment, 0)
                        else:
                            # check the size of the constant using the byte padding
                            for offset in xrange(self._local_alignment - 1, -1,
                                                 -1):
                                if idc.Byte(cur_ea +
                                            offset) != self._local_pad:
                                    break
                            # prepare the bytes
                            idc.MakeUnknown(cur_ea, self._local_alignment, 0)
                            # the data constant - try to make it pretty
                            if offset + 1 == 2:
                                idc.MakeWord(cur_ea)
                            elif offset + 1 == 4:
                                idc.MakeDword(cur_ea)
                            elif offset + 1 == 8:
                                idc.MakeQword(cur_ea)
                            else:
                                idc.MakeData(cur_ea, 0, offset + 1, 0)
                            # the padding
                            idc.MakeData(cur_ea + offset + 1, 0,
                                         self._local_alignment - offset + 1, 0)
                            # Now check for a pointer (only supports code pointers for now)
                            if offset + 1 == self._analyzer.addressSize():
                                value = self._analyzer.parseAdderss(cur_ea)
                                # only support pointers inside our local segment (more probable)
                                if sc.startEA <= value and value < sc.endEA:
                                    self._analyzer.markCodePtr(
                                        cur_ea, value, aggressive=False)
                                # try a pointer to a declared string
                                else:
                                    for sd in sds:
                                        if sd.startEA <= value and value <= sd.endEA:
                                            line = sark.Line(value)
                                            if line.is_string and line.startEA == value:
                                                self._analyzer.markDataPtr(
                                                    cur_ea,
                                                    value,
                                                    aggressive=False)
                                            break
                        # now move onward
                        cur_ea += self._local_alignment
                # found nothing, move on
                else:
                    cur_ea += self._local_alignment
    8: 'kn3.ob',
    12: 'kn4.ob',
    16: 'kn4.ob',
}

collide_type = {
    0: 'NonSolid',
    1: 'Collidee',
    2: 'Collider',
    16: 'Vm',
    128: 'Blood',
    144: 'BloodStain',
}


line = sark.Line()
next_line = True
while next_line:
    if line.disasm.startswith("AnimationFrame"):

        sprite, img_num, y, collide, x = unpack('<4BH', line.bytes)
        y = byte_to_sign(y)
        x = hex_to_sign(x)
        sprite = spritesheets[sprite]
        collide = collide_type[collide]

        #test = "ImagePosition('{}', {}, {}, {}, {}),".format(sprite, img_num, y, x, collide)
        test = "  - {{sheet: {}, image: {}, y: {}, x: {}, image_type:{}}}".format(sprite, img_num, y, x, collide)
        print test
        line = line.next
    elif line.disasm.startswith('EndOfAnimFrame <0FFh, 0>'):
Exemple #24
0
    def analyzeFunction(self, func_ea, src_mode):
        """Analyze a given function, and creates a canonical representation for it.

        Args:
            func_ea (int): effective address of the wanted function
            src_mode (bool): True iff analyzing a self-compiled source file, otherwise analyzing a binary function

        Return Value:
            FunctionContext object representing the analyzed function
        """
        func = sark.Function(func_ea)
        if src_mode:
            context = sourceContext()(self.funcNameInner(
                func.name), 0)  # Index is irrelevant for the source analysis
        else:
            context = binaryContext()(func_ea, self.funcNameInner(
                func.name), 0)  # The index will be adjusted later, manually

        func_start = func.start_ea
        instr_count = 0
        call_candidates = set()
        code_hash = md5()
        for line in func.lines:
            instr_count += 1
            # Numeric Constants
            data_refs = list(line.drefs_from)
            for oper in [x for x in line.insn.operands if x.type.is_imm]:
                if oper.imm not in data_refs:
                    context.recordConst(oper.imm)
            # Data Refs (strings, fptrs)
            for ref in data_refs:
                # Check for a string (finds un-analyzed strings too)
                str_const = self.disas.stringAt(ref)
                if str_const is not None and len(str_const) >= MIN_STR_SIZE:
                    context.recordString(str_const)
                    continue
                # Check for an fptr
                called_func = self.disas.funcAt(ref)
                if called_func is not None:
                    call_candidates.add(self.disas.funcStart(called_func))
                elif src_mode:
                    call_candidates.add(ref)
                    continue
            # Code Refs (calls and unknowns)
            for cref in line.crefs_from:
                called_func = self.disas.funcAt(cref)
                if called_func is None:
                    continue
                called_func_start = self.disas.funcStart(called_func)
                if (cref == func_start and
                        line.insn.is_call) or called_func_start != func_start:
                    call_candidates.add(called_func_start)
            # in binary mode don't let the call_candidates expand too much
            if not src_mode:
                [context.recordCall(x) for x in call_candidates]
                call_candidates = set()
            # hash the instruction (only in source mode)
            else:
                # two cases:
                # 1. No linker fixups, hash the binary - easy case
                # 2. Linker fixups, hash the text (includes the symbol name that the linker will use too)
                has_fixups = False
                # data variables
                for dref in line.drefs_from:
                    if sark.Line(dref).name in self.disas.exports():
                        has_fixups = True
                        break
                # external code functions
                if not has_fixups:
                    for cref in line.crefs_from:
                        if sark.Line(cref).name in self.disas.exports():
                            has_fixups = True
                            break
                # case #2
                if has_fixups:
                    code_hash.update(line.disasm.encode("utf-8"))
                # case #1
                else:
                    code_hash.update(line.bytes)

        # check all the call candidates together
        if src_mode:
            for candidate in call_candidates:
                ref_func = None
                called_func = self.disas.funcAt(candidate)
                if called_func is not None:
                    ref_func = self.disas.funcName(called_func)
                    risky = False
                else:
                    ref_func = self.disas.nameAt(candidate)
                    risky = True
                # check if known or unknown
                if sark.Line(candidate).disasm.split(" ")[0].lower() in (
                        "extrn", "extern", "import"):
                    context.recordUnknown(ref_func, is_fptr=risky)
                elif not risky:
                    context.recordCall(ref_func)
            # set the function's hash
            context.setHash(code_hash.hexdigest())

        context.setFrame(func.frame_size)
        context.setInstrCount(instr_count)

        # Now, record the code blocks
        flow = idaapi.FlowChart(func.func_t)
        for block in flow:
            try:
                context.recordBlock(
                    len(list(sark.CodeBlock(block.start_ea).lines)))
            except Exception:
                # happens with code outside of a function
                continue
        context.blocks.sort(reverse=True)

        # Now add the flow analysis
        context.setCallOrder(self.disas.analyzeFunctionGraph(
            func_ea, src_mode))

        return context
Exemple #25
0
    def locateLocalConstants(self, scs, sds):
        """Locate and define all of the local strings / numeric constants, that match our observed pattern.

        Args:
            scs (list): List of (sark) code segments.
            sds (list): List of (sark) data segments.
        """
        self._analyzer.logger.info(
            "Locating local strings / constants in the code sections")
        for sc in scs:
            cur_ea = pad(sc.start_ea, self._local_alignment)
            while cur_ea < sc.end_ea:
                # Only interested in data constants
                if not self.isDataConstant(cur_ea):
                    cur_ea += self._local_alignment
                    continue
                # check for a string (refs already checked)
                if self._analyzer.str_identifier.isLocalAsciiString(
                        cur_ea, check_refs=False):
                    length = self._analyzer.str_identifier.defineAsciiString(
                        cur_ea)
                    padded_length = pad(length, self._local_alignment)
                    if padded_length != length:
                        ida_bytes.del_items(cur_ea + length, 0,
                                            padded_length - length)
                        ida_bytes.create_data(cur_ea + length, 0,
                                              padded_length - length, 0)
                    cur_ea += padded_length
                    continue
                # This means it is a constant, now check if we have a padding
                if self._local_pad is None:
                    ida_bytes.create_data(cur_ea, 0, self._local_alignment, 0)
                    cur_ea += self._local_alignment
                    continue
                # check the size of the constant using the byte padding
                for offset in range(self._local_alignment - 1, -1, -1):
                    if idc.get_wide_byte(cur_ea + offset) != self._local_pad:
                        break
                # prepare the bytes
                ida_bytes.del_items(cur_ea, 0, self._local_alignment)
                # the data constant - try to make it pretty
                if offset + 1 == 2:
                    ida_bytes.create_data(cur_ea, idc.FF_WORD, 2, idc.BADADDR)
                elif offset + 1 == 4:
                    ida_bytes.create_data(cur_ea, idc.FF_DWORD, 4, idc.BADADDR)
                elif offset + 1 == 8:
                    ida_bytes.create_data(cur_ea, idc.FF_QWORD, 8, idc.BADADDR)
                else:
                    ida_bytes.create_data(cur_ea, 0, offset + 1, 0)
                # the padding
                ida_bytes.create_data(cur_ea + offset + 1, 0,
                                      self._local_alignment - offset + 1, 0)
                # Now check for a pointer (only supports code pointers for now)
                if offset + 1 == self._analyzer.addressSize():
                    value = self._analyzer.parseAdderss(cur_ea)
                    # only support pointers inside our local segment (more probable)
                    if sc.start_ea <= value < sc.end_ea:
                        self._analyzer.markCodePtr(cur_ea,
                                                   value,
                                                   aggressive=False)
                    # try a pointer to a declared string
                    else:
                        for sd in sds:
                            if sd.start_ea <= value <= sd.end_ea:
                                line = sark.Line(value)
                                if line.is_string and line.start_ea == value:
                                    self._analyzer.markDataPtr(
                                        cur_ea, value, aggressive=False)
                                break
                # now move onward
                cur_ea += self._local_alignment