def get_next_line(line): return sark.Line(line.ea + len(line.bytes))
import sark import idaapi import idautils anim = sark.structure.get_struct('AnimationFrame') while idaapi.is_debugger_on(): dataseg = sark.Segment(name='dataseg').ea anim_offset = idaapi.get_word(sark.Line(ea=dataseg + idautils.cpu.di + 2).ea) anim_addr = dataseg + anim_offset idaapi.doStruct(anim_addr, 6, anim) idaapi.jumpto(sark.Segment(name='dataseg').ea + anim_offset) idaapi.continue_process() idaapi.wait_for_next_event(2, 10000)
def safe_name(address): name = sark.Line(address).name if name[0:4] == 'unk_': return "0x%x" % address else: return "%s" % name
idc.MakeWord(line.ea) val = Word(line.ea) if val > 31: #ignore data references to small values (like r0-r31) idc.OpOff(line.ea, 0, ram_segment.startEA) print "all lines in 0x%x - 0x%x are now words" % (ram_segment.startEA, ram_segment.endEA) #pointify print "looking for off_{} to rename to {}_ptr" counter = 0 for (name_ea, name) in idautils.Names(): logger.debug("looking for off_ %s @ 0x%x" % (name, name_ea)) Wait() for xref in sark.Line(name_ea).xrefs_to: logger.debug("considering xref to %s at 0x%x" % (name, xref.frm)) original_name = sark.Line(xref.frm).name if original_name.startswith("off_"): i = 0 pointer_name = name + "_ptr" while sark.Line(name=pointer_name).ea != idc.BADADDR: pointer_name = name + "_ptr%d" % i i += 1 sark.Line(xref.frm).name = pointer_name logger.debug("renamed %s to %s" % (name, pointer_name)) counter += 1 print "renamed %d pointers" % counter
def functionScan(analyzer, scs): """Scan the code segment and try to define functions. Args: analyzer (instance): analyzer instance to be used scs (list): list of (sark) code segments Notes ----- An attempt to declare a function will occur if we found: 1. Code line after a previous function - and it looks like the beginning of a function of the same code type 2. Unknown after a previous function - and it looks like the beginning of a function of the estimated code type """ for sc in scs: analyzer.logger.info("Function scanning code segment: 0x%x - 0x%x", sc.start_ea, sc.end_ea) search_func = False just_started = True line = sark.Line(sc.start_ea) while line.start_ea < sc.end_ea: # we don't care about data lines if line.is_data: line = line.next continue # check for code lines if line.is_code: try: sark.Function(line.start_ea) search_func = False just_started = True line = line.next continue except sark.exceptions.SarkNoFunction: if just_started: just_started = False else: search_func = True # If we are searching for a function, simply continue if search_func or analyzer.switch_identifier.isSwitchCase( line.start_ea): line = line.next continue original_code_type = analyzer.codeType(line.start_ea) # If this is code, check that it matches the start of a function, and make it a function if line.is_code and analyzer.supportedCodeType(original_code_type) and \ analyzer.func_classifier.predictFunctionStartMixed(line.start_ea): if not ida_funcs.add_func(line.start_ea): line = line.next else: analyzer.logger.debug("Declared a function at: 0x%x", line.start_ea) continue # Code, and doesn't look like a function's start if line.is_code: # skip for now line = line.next continue # If unknown, check if a function and don't try to keep the same code type if line.is_unknown: guess_code_type = analyzer.func_classifier.predictFunctionStartType( line.start_ea) if analyzer.func_classifier.predictFunctionStart( line.start_ea, guess_code_type): if original_code_type != guess_code_type: analyzer.setCodeType(line.start_ea, line.start_ea + 1, guess_code_type) if not ida_funcs.add_func(line.start_ea): if original_code_type != guess_code_type: analyzer.setCodeType(line.start_ea, line.start_ea + 1, original_code_type) line = line.next else: analyzer.logger.debug( "Declared a function at: 0x%x (Type %d, Local type %d)", line.start_ea, guess_code_type, original_code_type) continue # otherwise, do nothing line = line.next continue
def thumbsUp(analyzer, sc, aggressive=False, align=False): """Use various metrics in order to locate / fix code type transitions. Args: analyzer (instance): analyzer instance to be used sc (segment): (sark) code segment to work on aggressive (bool, optional): True iff should use aggressive heuristics (False by default) align (bool, optional): True iff should use align-based heuristics (False by default) Notes ----- 1. Convert (cancel) a code region that is contained inside the same function, and contains unexplored bytes (not a Chunk, and contains no functions) 2. Convert (cancel) a code region that is misaligned and contains no functions 3. Aggressive - Convert (cancel) a code region if the classifier doesn't agree on it's start 4. Aggressive - Convert (cancel) a code region if it contains illegal code lines / unknowns, and it contains no functions 5. Aggressive - Convert (cancel) a pointed code region that could be misinterpreted, and that contains no functions (+ delete the fptr) 6. Aggressive - Convert (cancel) a code region that begins on a function start, that could be misinterpreted, and that contains no functions 7. Resize a code region that needs a little alignment 8. In all of the heuristics, if the code region before us was OK and we merged with him, there is no need to check it again. """ regions_fixed = 1 line = sark.Line(sc.start_ea) regions = CodeRegions() first_round = True is_fptr_pointed = False code_aligned = False region_start = 0 metric = None # Only continue if we changed something during the current round while regions_fixed > 0: regions_fixed = 0 starting_new_region = True # edge case for the first line in the section dummy_mode = False prev_code_type = None region_converted = False region_code_type = None if not first_round: interesting_regions = regions.changedRegions() analyzer.logger.debug("%d interesting regions", len(interesting_regions)) # edge case, if we have nothing to do if len(interesting_regions) == 0: break line = sark.Line(interesting_regions[0].start) region_offset = -1 # iterate the current region while line.start_ea < sc.end_ea: if not starting_new_region: # check if we found a transitions new_code_type = analyzer.codeType(line.start_ea) # no change, just keep on if region_code_type == new_code_type: if not dummy_mode: metric.record(line) line = line.next continue # we found a transition region_end = line.start_ea region_converted = False if first_round: region = CodeRegion(region_start, region_end, region_code_type) regions.insert(region) # in dummy mode, don't do a thing if dummy_mode: metrics = [] analyzer.logger.debug( "Dummy region of code type %d in range 0x%x - 0x%x", region_code_type, region_start, region_end) # actually do something else: # get the metrics metric.stop(region_end) # suffix / align metrics align_metric = metric.alignMetric() metrics = [metric] + ([align_metric] if align_metric is not None else []) first_metric_region_fixed = True aligned_region_fixed = True # Examine both metrics for code_metric in metrics: contains_functions = code_metric.containsFunctions() unknown_count, unknown_ratio = code_metric.unknowns() illegal_count, illegal_ratio = code_metric.illegals() has_unknown_or_illegal = unknown_count > 0 or illegal_count > 0 containing_function = code_metric.containingFunction() start_function = code_metric.startFunction() metric_region_start, metric_region_end = code_metric.borders( ) metric_region_size = metric_region_end - metric_region_start # special case for the last metric if code_metric == align_metric: aligned_region = True metric_name = "Aligned" else: aligned_region = False metric_name = "Regular" # debug prints analyzer.logger.debug( "%s Metric: Code type %d used in range 0x%x - 0x%x (Pointed: %s, Contains functions: %s)", metric_name, region_code_type, metric_region_start, metric_region_end, str(is_fptr_pointed), contains_functions) if unknown_count > 0: analyzer.logger.debug( "Unknowns %d / %d Overall size = %f%%", unknown_count, metric_region_size, unknown_ratio * 100) if illegal_count > 0: analyzer.logger.debug( "Illegals %d / %d Overall size = %f%%", illegal_count, metric_region_size, illegal_ratio * 100) # Check if we can flip this region # 1. The entire code region is contained inside the same function, and contains unexplored bytes (not a Chunk, and contains no functions) if containing_function is not None and containing_function.start_ea < metric_region_start and metric_region_end <= containing_function.end_ea and\ has_unknown_or_illegal and not contains_functions: analyzer.logger.info( "Code region is contained inside a single function - cancel it" ) convertRegion(analyzer, metric_region_start, metric_region_end) regions.convert(region, new_code_type) region_converted = True regions_fixed += 1 # 2. Misaligned region elif not aligned_region and not code_aligned and not contains_functions: analyzer.logger.info( "Misaligned code region without any functions - cancel it" ) convertRegion(analyzer, metric_region_start, metric_region_end) regions.convert(region, new_code_type) region_converted = True regions_fixed += 1 # 3. Aggressive - Classifier doesn't agree about this region's start elif aggressive and\ not aligned_region and\ analyzer.func_classifier.predictFunctionStartType(metric_region_start) != region_code_type and\ analyzer.func_classifier.predictFunctionStartType(metric_region_end) == new_code_type and\ not contains_functions: analyzer.logger.info( "Classifier doesn't agree about the code region's start, and it has no functions - cancel it" ) convertRegion(analyzer, metric_region_start, metric_region_end) regions.convert(region, new_code_type) region_converted = True regions_fixed += 1 # 4. Aggressive - Unknowns and no functions elif aggressive and\ has_unknown_or_illegal and not contains_functions: analyzer.logger.info( "Code region contains unexplored bytes, and it has no functions - fixing it" ) convertRegion(analyzer, metric_region_start, metric_region_end) regions.convert(region, new_code_type) region_converted = True regions_fixed += 1 # 5. Aggressive - pointed region that could be misinterpreted + no functions elif aggressive and\ not aligned_region and\ is_fptr_pointed and\ prev_code_type is not None and\ ((not analyzer.func_classifier.predictFunctionEnd(metric_region_start, prev_code_type)) or\ ((metric_region_size <= analyzer.addressSize()) and not analyzer.func_classifier.predictFunctionEnd(metric_region_end, region_code_type)) or\ ((metric_region_size <= analyzer.addressSize()) and not analyzer.func_classifier.predictFunctionStart(metric_region_end, new_code_type)) or\ analyzer.func_classifier.predictFunctionStart(metric_region_start, new_code_type)) and\ not contains_functions: analyzer.logger.info( "Code region is fptr pointed, classifier says it's not a function end, and it has no functions - cancel it" ) # delete the fptr analyzer.fptr_identifier.deleteFptr( metric_region_start, region_code_type) convertRegion(analyzer, metric_region_start, metric_region_end) regions.convert(region, new_code_type) region_converted = True regions_fixed += 1 # 6. Aggressive - region on function start, that could be misinterpreted + no functions elif aggressive and\ not aligned_region and\ start_function is not None and metric_region_start == start_function.start_ea and\ analyzer.func_classifier.predictFunctionStart(metric_region_start, new_code_type) and\ not contains_functions: analyzer.logger.info( "Code region is a function start, classifier prefers a different code type, and it has no functions - cancel it" ) convertRegion(analyzer, metric_region_start, metric_region_end) regions.convert(region, new_code_type) region_converted = True regions_fixed += 1 # 7. Needs a little alignment elif not aligned_region and not code_aligned: analyzer.logger.debug( "Code region is not aligned, align it down (resize)" ) resized_start = analyzer.alignTransitionAddress( metric_region_start, region_code_type) resizeRegion(analyzer, metric_region_start, metric_region_end, resized_start, metric_region_end) regions.resizeStart(region, resized_start) regions_fixed += 1 # Nothing for now else: if aligned_region: aligned_region_fixed = False else: first_metric_region_fixed = False # Aligned region should start with a function if aligned_region and aligned_region_fixed: ida_funcs.add_func(metric_region_start) # Break the loop and start the new region if first_metric_region_fixed: break # if our region was converted, there is no need to scan the current region (partial data) dummy_mode = region_converted and first_round # new region - check if finished the list if not first_round: region_offset += 1 if region_offset >= len(interesting_regions): break # check if we need to skip the next one too if region_converted and region.next == interesting_regions[ region_offset] and interesting_regions[ region_offset].code_type == new_code_type: region_offset += 1 if region_offset >= len(interesting_regions): break region = interesting_regions[region_offset] line = sark.Line(region.start) region_start = line.start_ea if region.prev is not None: prev_code_type = region.prev.code_type else: prev_code_type = None # the simple case else: # the code type could have changed, so we re-sample it if region_code_type is not None: prev_code_type = analyzer.codeType(region_start) region_start = line.start_ea # get the current code type (even in dummy mode) region_code_type = analyzer.codeType(line.start_ea) if not dummy_mode: code_aligned = analyzer.isCodeTransitionAligned(region_start) starting_new_region = False # measure the metrics metric = CodeMetric(analyzer, region_start, measure_align=align) metric.start(line) # check if started because of one of our function pointers is_fptr_pointed = analyzer.fptr_identifier.isPointedFunction( region_start) # advance to the next line if first_round: line = line.next # log the result analyzer.logger.info("Fixed %d code regions in this iteration", regions_fixed) first_round = False
def get_cur_type(ea, reg): opnds = sark.Line(ea).insn.operands for i in range(len(opnds)): if '[%s]' % get_reg_user_name(ea, reg) in opnds[i].text: str_id = ida_struct.get_struc_id(type_name) idc.op_stroff(ea, i, str_id, 0)
def rename_function_by_aString_surrounding_call(aString, funcName, xref_func=first_xref, count_max=10, filtered_funcs=[], count_filtered_funcs=0, head_func=prev_head): global ERROR_MINUS_1 if name_to_addr(funcName) != None: logmsg("%s already defined" % funcName) return True if filtered_funcs and count_filtered_funcs > 0: logmsg("ERROR: Only one argument is supported") return False # required functions to locate funcName for filtered_name in filtered_funcs: if name_to_addr(filtered_name) == None: logmsg("required function: %s missing, can't locate %s" % (filtered_name, funcName)) return False addr_str = name_to_addr(aString) if addr_str == None: return False addr_str_used = xref_func(addr_str) if addr_str_used == None: return False try: sark.Function(ea=addr_str_used) except sark.exceptions.SarkNoFunction: logmsg("No function at 0x%x when handling %s" % (addr_str_used, aString)) return False count = 0 e = addr_str_used bFound = False while count <= count_max: e = head_func(e) line = sark.Line(e) #print(line) try: insn = line.insn except sark.exceptions.SarkNoInstruction: logmsg( "data in the middle of instructions at 0x%x, not supported yet" % e) return False if insn.mnem == "BL": if len(insn.operands) != 1: logmsg("Wrong number of operands for BL at 0x%x" % e) return False curr_func_name = insn.operands[0].text # do we need to skip this "BL" or are we done? bFiltered = False if count_filtered_funcs > 0: logmsg("skipping filtered due to count: %d at 0x%x" % (count_filtered_funcs, e)) count_filtered_funcs -= 1 bFiltered = True else: for filtered_name in filtered_funcs: if curr_func_name == filtered_name: logmsg("skipping filtered name: %s at 0x%x" % (filtered_name, e)) bFiltered = True break if bFiltered: count += 1 continue func_addr = name_to_addr(curr_func_name) if func_addr == None: return False rename_address(func_addr, funcName) logmsg("%s = 0x%x" % (funcName, func_addr)) bFound = True break count += 1 if not bFound: logmsg("ERROR: %s not found" % funcName) return False return True
import sark import idaapi import idautils anim = sark.structure.get_struct('TroggSpearImage') end_of_frame = sark.structure.get_struct("EndOfAnimFrame") dataseg = sark.Segment(name='dataseg').ea # anim_offset = idaapi.get_word(sark.Line(ea=dataseg + idautils.cpu.di + 2).ea) current_position = sark.Line().ea # current_byte = idaapi.get_byte(current_position) done = False while not done: current_byte = idaapi.get_byte(current_position) if current_byte == 0xff: print("applying EndOfAnimFrame") idaapi.doStruct(current_position, 2, end_of_frame) next_byte = idaapi.get_byte(current_position + 1) if next_byte == 0xff: done = True current_position += 2 elif current_byte < 0x80: # print(current_byte) print("applying AnimationFrame") test = idaapi.doStruct(current_position, 6, anim) # print(test) current_position += 6 # print(hex(current_position-dataseg)) else: done = True
def clear_all_highlights(self): for ea in self.lines: sark.Line(ea=ea).color = None self.lines.clear()
def is_ret(ea): """ Check if the current instruction a RET instruction """ return sark.Line(ea).insn.is_ret
def is_call(ea): """ Check if the current instruction a CALL instruction """ return sark.Line(ea).insn.is_call
mu = Uc(UC_ARCH_ARM, UC_MODE_THUMB) mu.mem_map(ADDRESS, 2 * 1024 * 1024) mu.mem_write(ADDRESS, code) mu.emu_start(ADDRESS | 1, ADDRESS + len(code)) return mu.reg_read(UC_ARM_REG_R0) left = [] system_addr = 0x0933C printf_addr = 0x9174 sprintf_addr = 0x9470 address = sprintf_addr my_line = sark.Line(address) for xref in my_line.xrefs_to: line = sark.Line(xref.frm) code = '' for i in range(20): if detectMovR0Const(line): code = line.bytes + code line = line.prev r0 = simulate(code) print '---', r0 arg = sark.Line(r0) if arg.is_string: print arg else: left.append(sark.Line(xref.frm))
def _xrefs_from(function_ea): try: return sark.Function(function_ea).xrefs_from except exceptions.SarkNoFunction: return sark.Line(function_ea).xrefs_from
import sark import idaapi import idautils anim = sark.structure.get_struct('DrawData') while idaapi.is_debugger_on(): dataseg = sark.Segment(name='dataseg').ea anim_offset = sark.Line(ea=dataseg + idautils.cpu.di).ea anim_addr = dataseg + anim_offset idaapi.doStruct(anim_offset, 0x24, anim) idaapi.jumpto(anim_offset) idaapi.continue_process() idaapi.wait_for_next_event(2, 10000)
def get_regs_in_operand(self, ea, operand_idx): iorb = InsnOpndRegBits() r_reg2idx = dict() r_reg2idx['LR'] = 14 r_reg2idx['SP'] = 13 r_reg2idx['PC'] = 15 for i in range(12 + 1): r_reg2idx['R%d' % i] = i d_reg2idx = dict() for i in range(31 + 1): d_reg2idx['D%d' % i] = i r_idx2reg = {} for k in r_reg2idx.keys(): r_idx2reg[r_reg2idx[k]] = k d_idx2reg = {} for k in d_reg2idx.keys(): d_idx2reg[d_reg2idx[k]] = k opnd = sark.Line(ea).insn.operands[operand_idx] # can't be sure that sark got all regs - for example, # 'ld16.bu d0, [a12]' doens't recognise a12 all_regs = self.get_reg_list() operand_res = [] # R0 operand_res += [(r'^([^,\[\]]+)$', UsageBits.OP_UK)] # LR! operand_res += [(r'^([^,\[\]]+)\!$', UsageBits.OP_RW)] # [R0] operand_res += [(r'^\[([^,\[\]]+)\]$', UsageBits.OP_RD)] # [R0,#0x20] operand_res += [(r'^\[([^,\[\]]+),.*\]$', UsageBits.OP_RD)] # R0,LSR#2 operand_res += [(r'^([^,\[\]]+),LS[RL]#[0-9]+$', UsageBits.OP_RD)] # [r12, 5]! operand_res += [(r'\[([^,]+)\,.*\]\!', UsageBits.OP_RW)] # [R0],#0x54 operand_res += [(r'\[([^,]+)\],#.+$', UsageBits.OP_RW)] for operand_re, op_bits in operand_res: m = re.match(operand_re, opnd.text) if m is None: continue reg_set = set() e_reg = m.group(1) e_reg = RegName(ea, all_regs).canon(e_reg) if e_reg is not None: reg_set |= {e_reg} # Didn't recognise any such reg. Probably false positive. Try next else: continue for reg in reg_set: iorb.set_usage_bits(reg, operand_idx, op_bits | UsageBits.USAGE_EXPLICIT) for reg in (self.reg_expand(ea, reg_set) ^ reg_set): iorb.set_usage_bits( reg, operand_idx, op_bits | UsageBits.USAGE_IMPLICIT_COMPOSITE) return iorb # handle difficult operands operand_re = r'^\{([^,-]+([,-][^,-]+)+)\}$' # {R4-R8,LR} op_bits = UsageBits.OP_UK m = re.match(operand_re, opnd.text) canon_list = self.get_reg_list() if m is not None: reg_set = set() reg_explicit_set = set() elem_list = m.group(1) for r in elem_list.split(','): # Not a range if '-' not in r: e_reg = RegName(ea, canon_list).canon(r) if e_reg is not None: reg_set |= {e_reg} reg_explicit_set |= {e_reg} # Is a range else: s, e = r.split('-') s = RegName(ea, canon_list).canon(s) e = RegName(ea, canon_list).canon(e) if (s is not None) and (e is not None): if (s in r_reg2idx) and (e in r_reg2idx): reg_explicit_set |= {s} reg_explicit_set |= {e} s_i = r_reg2idx[s] e_i = r_reg2idx[e] for i in range(s_i, e_i + 1): reg_set |= {r_idx2reg[i]} elif (s in d_reg2idx) and (e in d_reg2idx): reg_explicit_set |= {s} reg_explicit_set |= {e} s_i = d_reg2idx[s] e_i = d_reg2idx[e] for i in range(s_i, e_i + 1): reg_set |= {d_idx2reg[i]} else: logger.error('range with unknown - {}'.format(r)) for reg in reg_set: if reg in reg_explicit_set: iorb.set_usage_bits(reg, operand_idx, op_bits | UsageBits.USAGE_EXPLICIT) else: iorb.set_usage_bits( reg, operand_idx, op_bits | UsageBits.USAGE_IMPLICIT_RANGE) for reg in (self.reg_expand(ea, reg_set) ^ reg_set): iorb.set_usage_bits( reg, operand_idx, op_bits | UsageBits.USAGE_IMPLICIT_COMPOSITE) return iorb return iorb
logger.addHandler(handler) logger.setLevel(logging.WARNING) try: import idautils import idaapi import sark ram_segment = None rom_segment = None for segment in sark.segments(): if segment.name == 'RAM' or segment.name == '.data': ram_segment = segment elif segment.name == 'ROM' or segment.name == '.text': rom_segment = segment for (name_ea, name) in idautils.Names(): if not name.startswith("USART"): continue logger.debug("looking for xrefs to %s @ 0x%x" % (name, name_ea)) Wait() for xref in sark.Line(name_ea).xrefs_to: print "%s <-- 0x%x" % (name, xref.frm) except: exc_type, exc_value, exc_traceback = sys.exc_info() logger.error("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback)) idascript.exit()
register_group = all_bases.get(key) base_name = register_group.attrib['name'] name_in_module = register_group.attrib['name-in-module'] base = int(register_group.attrib['offset'], 0) for register in root.findall( ".//modules/module/register-group[@name='%s']/register" % name_in_module): name = register.attrib['name'] offset = int(register.attrib['offset'], 0) caption = register.attrib['caption'] ioport_name = "%s_%s" % (base_name, name) #the ATxmega128a4u doens't have a register file mapped at 0x0 in RAM which is being forced by the avr IDA module; correct the location of all the mapped IO ports wrong_line = sark.Line(idaapi.get_name_ea(0, ioport_name)) right_line = sark.Line(wrong_line.ea - 0x20) wrong_line.name = "" wrong_line.comments.repeat = "" offset = right_line.ea - r0_address #actually renaming the data locations in the first 0x20 will make IDA disassembly look bonkers because it uses the names of those data locations for its register names #just append a comment about the ioport location in question to the first 0x20 if offset <= 0x20: previous_comment = right_line.comments.repeat ioport_comment = "io:%s" % ioport_name if previous_comment is None: right_line.comments.repeat = ioport_comment else: right_line.comments.repeat = previous_comment + " " + ioport_comment
if not logger.handlers: handler = logging.StreamHandler(stream=sys.stdout) logger.addHandler(handler) try: import idautils import idaapi import sark data_vectors = { 'r27': 'XH', 'r26': 'XL', 'r29': 'YH', 'r28': 'YL', 'r31': 'ZH', 'r30': 'ZL' } for vector_register in data_vectors.keys(): register_line = sark.Line(idaapi.get_name_ea(0, vector_register)) register_line.name = data_vectors.get(vector_register) register_line.comments.repeat = "alias:%s" % vector_register except: exc_type, exc_value, exc_traceback = sys.exc_info() logger.error("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback)) idascript.exit()
def dataScan(analyzer, scs): """Scan the code segments for orphan data blobs that represent analysis errors. Args: analyzer (instance): analyzer instance to be used scs (list): list of (sark) code segments """ # First Scan - unreffed data chunks inside functions ==> should be converted to code for sc in scs: first_line = None end_line = None for line in sc.lines: # After the first, the rest of the lines should have 0 crefs if first_line is not None and ((not line.is_data) or len(list(line.drefs_to)) > 0 or len(list(line.crefs_to)) > 0): end_line = line # we only care about data lines with a single cref from the previous line elif first_line is None and ( (not line.is_data) or len(list(line.drefs_to)) > 0 or len(list(line.crefs_to)) != 1 or sark.Line(list(line.crefs_to)[0]).next != line): end_line = line # don't mark switch entries elif analyzer.switch_identifier.isSwitchEntry(line.start_ea): end_line = line # Finally, check if it could be a function of some type elif first_line is None: first_line = line continue # Found an adjacent suitable line else: continue # Now check if we found something (end_line is always != None at this point) if first_line is not None and end_line is not None: chunk_start = first_line.start_ea chunk_end = end_line.start_ea # check that we can deduce anything on this current code type if not analyzer.supportedCodeType( analyzer.codeType(chunk_start)): continue # check that the chunk before us is not the end of a function if analyzer.func_classifier.predictFunctionEnd(chunk_start): # shouldn't really happen, do nothing in this case pass # data chunk in the middle of a function, and not at it's end - convert it to code else: analyzer.logger.debug( "In-Function data chunk at: 0x%x - 0x%x (%d)", chunk_start, chunk_end, chunk_end - chunk_start) ida_bytes.del_items(chunk_start, 0, chunk_end - chunk_start) idc.create_insn(chunk_start) # reset the vars first_line = None end_line = None # Second scan - unreffed data chunks outside of functions ==> new functions, possibly of different code type size_limit = analyzer.func_classifier.functionStartSize() analyzer.logger.debug("Size limit for data scan is: %d", size_limit) conversion_candidates = [] # recon pass for sc in scs: first_line = None end_line = None for line in sc.lines: # we only care about data lines without xrefs if (not line.is_data) or len(list(line.crefs_to)) > 0 or len( list(line.drefs_to)) > 0: end_line = line # check if it's big enough for the classifier elif line.size < size_limit: end_line = line # check if it looks like a string elif analyzer.str_identifier.isLocalAsciiString(line.start_ea, check_refs=False): analyzer.str_identifier.defineAsciiString(line.start_ea) end_line = line # make sure it isn't a switch entry elif analyzer.switch_identifier.isSwitchEntry(line.start_ea): end_line = line # Finally, check if it could be a function of some type elif first_line is None: first_line = line continue # Found an adjacent suitable line else: continue # Now check if we found something (end_line is always != None at this point) if first_line is not None and end_line is not None: chunk_start = first_line.start_ea chunk_end = end_line.start_ea guess_code_type = analyzer.func_classifier.predictFunctionStartType( chunk_start) original_code_type = analyzer.codeType(chunk_start) analyzer.logger.debug( "Found a data chunk at: 0x%x - 0x%x (%d), (Type %d, Local type %d)", chunk_start, chunk_end, chunk_end - chunk_start, guess_code_type, original_code_type) # Check if this is the beginning of a function if analyzer.func_classifier.predictFunctionStart( chunk_start, guess_code_type): conversion_candidates.append( (chunk_start, chunk_end, guess_code_type, original_code_type)) # reset the vars first_line = None end_line = None # conversion pass for chunk_start, chunk_end, guess_code_type, original_code_type in conversion_candidates: analyzer.logger.info( "Found an isolated data chunk at: 0x%x - 0x%x (%d), (Type %d, Local type %d)", chunk_start, chunk_end, chunk_end - chunk_start, guess_code_type, original_code_type) ida_bytes.del_items(chunk_start, 0, chunk_end - chunk_start) if original_code_type != guess_code_type: analyzer.setCodeType(chunk_start, chunk_end, guess_code_type) idc.plan_and_wait(chunk_start, chunk_end) ida_funcs.add_func(chunk_start)
def locateDataPtrs(self, scs, sds): """Locate all data / code fptrs in the given set of segments. Args: scs (list): list of (sark) code segments sds (list): list of (sark) data segments """ local_ref_ptrs = defaultdict(set) seen_list = [] approved_ptrs = [] approved_eas = set() ptrs_mappings = defaultdict(set) marked_artifacts = [] for sd in sds: cur_ea = pad(sd.start_ea, self._analyzer.data_fptr_alignment) while cur_ea < sd.end_ea: line = sark.Line(cur_ea) if line.is_string: cur_ea += pad(line.size, self._analyzer.data_fptr_alignment) continue # check for a function ptr value = self._analyzer.parseAdderss(cur_ea) # make sure it is valid if self.isValidCodePtr(value, scs): func_value = self._analyzer.cleanPtr(value) code_type = self._analyzer.ptrCodeType(value) # is seen if func_value in local_ref_ptrs: local_ref_ptrs[func_value].add(code_type) ptrs_mappings[func_value].add(cur_ea) self._analyzer.logger.debug( "Located a fptr from 0x%x to 0x%x (type: %d) - Undeclared function", cur_ea, func_value, code_type) if self.isPrintableAddress(value): self._analyzer.logger.debug( "Looks like a printable FP: 0x%x", value) approved_ptrs.append((cur_ea, value)) approved_eas.add(cur_ea) seen_list.append((cur_ea, True)) marked_artifacts.append((cur_ea, True)) # is start of real function, from the correct type elif self._analyzer.codeType( func_value ) == code_type and self._analyzer.func_classifier.isFuncStart( func_value): local_ref_ptrs[func_value].add(code_type) ptrs_mappings[func_value].add(cur_ea) self._analyzer.logger.debug( "Located a fptr from 0x%x to 0x%x (type: %d) - Existing function", cur_ea, func_value, code_type) approved_ptrs.append((cur_ea, value)) approved_eas.add(cur_ea) seen_list.append((cur_ea, True)) marked_artifacts.append((cur_ea, True)) # is start of function elif self._analyzer.func_classifier.predictFunctionStartMixed( func_value, known_type=code_type): local_ref_ptrs[func_value].add(code_type) ptrs_mappings[func_value].add(cur_ea) self._analyzer.logger.debug( "Located a fptr from 0x%x to 0x%x (type: %d) - Undeclared function", cur_ea, func_value, code_type) if self.isPrintableAddress(value): self._analyzer.logger.debug( "Looks like a printable FP: 0x%x", value) approved_ptrs.append((cur_ea, value)) approved_eas.add(cur_ea) seen_list.append((cur_ea, True)) marked_artifacts.append((cur_ea, True)) # only a candidate - may be will be approved later else: seen_list.append((cur_ea, False)) # check for an analysis problem if len(list(line.drefs_from)) > 0: idc.del_dref(cur_ea, value) idc.del_dref(cur_ea, func_value) # Check for a valid data pointer elif self.isValidDataPtr(value, sds): # make it a data pointer self._analyzer.markDataPtr(cur_ea, value) self._analyzer.logger.debug( "Located a data ptr from 0x%x to 0x%x", cur_ea, value) marked_artifacts.append((cur_ea, False)) marked_artifacts.append((value, False)) # continue forward cur_ea += pad(self._analyzer.addressSize(), self._analyzer.data_fptr_alignment) # check if there is some pattern we can use to find more fptrs chosen_threshold = 7 cur_window = [] window_index = 0 # NOTE: this step is too risky if there are Read-Only data constants inside the text section while window_index < len( seen_list) and not self._analyzer.isCodeMixedWithData(): # If we didn't reach the end, and # 1. The window doesn't have enough "True" pointers # 2. The windows contains only "True" pointers # Slide the window onward while window_index < len(seen_list) and ( len(list(filter(lambda x: x[1], cur_window))) < chosen_threshold or len(list(filter(lambda x: not x[1], cur_window))) == 0): # If we are above the threshold (meaning that cond #2 applies), kick out the first ptr (which is a "True" ptr) if chosen_threshold < len( list(filter(lambda x: x[1], cur_window))): cur_window = cur_window[1:] # Add a new pointer at the end of our window cur_window.append(seen_list[window_index]) window_index += 1 # Sanity check: check if we have a candidate if window_index == len(seen_list) and len( list(filter(lambda x: not x[1], cur_window))) == 0: break # measure the deltas chosen_window = list(filter(lambda x: x[1], cur_window)) # deltas between the "True" pointers chosen_deltas = set() for i in range(len(chosen_window) - 1): chosen_deltas.add(chosen_window[i + 1][0] - chosen_window[i][0]) # All possible deltas between adjacent pointers seen_deltas = set() for i in range(len(cur_window) - 1): seen_deltas.add(cur_window[i + 1][0] - cur_window[i][0]) new_chosen = None # check for a pattern if len(seen_deltas) <= len(chosen_deltas): new_chosen = list(filter(lambda x: not x[1], cur_window))[0] # check if the window starts with a candidate, that is right near a "True" pointer elif not cur_window[0][1]: first_seen = cur_window[0] seen_addr = first_seen[0] for candidate in [ seen_addr - self._analyzer.data_fptr_alignment, seen_addr + self._analyzer.data_fptr_alignment ]: if candidate in approved_eas: new_chosen = first_seen break # check if found a match if new_chosen is not None: # re-insert ourselves with our new values our_index = cur_window.index(new_chosen) cur_window = cur_window[:our_index] + [ (new_chosen[0], True) ] + cur_window[our_index + 1:] # mark the pointer cur_ea = new_chosen[0] value = self._analyzer.parseAdderss(cur_ea) func_value = self._analyzer.cleanPtr(value) code_type = self._analyzer.ptrCodeType(value) local_ref_ptrs[func_value].add(code_type) ptrs_mappings[func_value].add(cur_ea) approved_ptrs.append((cur_ea, value)) marked_artifacts.append((cur_ea, True)) approved_eas.add(cur_ea) self._analyzer.logger.debug( "Located new fptr from 0x%x to 0x%x (type: %d)", cur_ea, func_value, code_type) # advance the window cur_window = cur_window[1:] # filter the pointers (we could have false positives) disqualified_addresses = set() for cur_ea, raw_address in approved_ptrs: fixed_address = self._analyzer.cleanPtr(raw_address) disqualified = False # check if already disqualified if fixed_address not in ptrs_mappings: continue # Several code types for the same address, we take no chances and remove them all if len(local_ref_ptrs[fixed_address]) != 1: disqualified = True # Check if the code type is even legal for that address else: wanted_code_type = list(local_ref_ptrs[fixed_address])[0] orig_code_type = self._analyzer.codeType(fixed_address) idc.ida_bytes.del_items(fixed_address, 0, self._analyzer.addressSize()) if orig_code_type != wanted_code_type: self._analyzer.setCodeType(fixed_address, fixed_address + 4, wanted_code_type) if idc.create_insn(fixed_address) == 0: disqualified = True # Always clean after ourselves ida_bytes.del_items(fixed_address, 0, self._analyzer.addressSize()) if orig_code_type != wanted_code_type: self._analyzer.setCodeType( fixed_address, fixed_address + self._analyzer.addressSize(), orig_code_type) # We are OK, can continue if not disqualified: continue # Found a false function pointer # Be cautious with the removals, we could have duplicates if fixed_address in self._ptrs_mappings: self._ptrs_mappings.pop(fixed_address) disqualified_addresses.add(raw_address) marked_artifacts.remove((cur_ea, True)) # no need to remove from local_ref_ptrs, as the global variable only gets the approved values # no need to remove from approved_eas, as this data set isn't used anymore self._analyzer.logger.debug( "Disqualified (code) pointer 0x%08x from 0x%08x (type %d, seen types %s)", fixed_address, cur_ea, wanted_code_type, local_ref_ptrs[fixed_address]) # Now filter them based on scoped range from other artifacts marked_artifacts.sort(key=lambda x: x[0]) cur_index = 0 prev_artifact = None while cur_index < len(marked_artifacts) - 1: cur_ea, is_fptr = marked_artifacts[cur_index] next_ea, _ = marked_artifacts[cur_index + 1] # Only check ourselves against the next in line if cur_ea + FPTR_LOCALITY_RANGE < next_ea: if prev_artifact is None and is_fptr: # we should be disqualified raw_address = self._analyzer.parseAdderss(cur_ea) wanted_code_type = self._analyzer.ptrCodeType(raw_address) fixed_address = self._analyzer.cleanPtr(raw_address) # Be cautious with the removals, we could have duplicates if fixed_address in self._ptrs_mappings: self._ptrs_mappings.pop(fixed_address) disqualified_addresses.add(raw_address) self._analyzer.logger.debug( "Disqualified (scope) pointer 0x%08x from 0x%08x (type %d))", fixed_address, cur_ea, wanted_code_type) # set the prev artifact prev_artifact = None # check the next element cur_index += 1 # We are linking to the next element, so he is legit too else: prev_artifact = next_ea cur_index += 1 # mark the pointers for cur_ea, raw_address in filter( lambda x: x[1] not in disqualified_addresses, approved_ptrs): self._ref_ptrs[self._analyzer.cleanPtr( raw_address)] = self._analyzer.ptrCodeType(raw_address) self._analyzer.markCodePtr(cur_ea, raw_address) # print some results self._analyzer.logger.info( "Found %d different potential function pointer destinations", len(self._ref_ptrs))
def locateLocalConstants(self, scs, sds): """Locate and define all of the local strings / numeric constants, that match our observed pattern. Args: scs (list): List of (sark) code segments. sds (list): List of (sark) data segments. """ self._analyzer.logger.info( "Locating local strings / constants in the code sections") for sc in scs: cur_ea = pad(sc.startEA, self._local_alignment) while cur_ea < sc.endEA: # check for a data constant if self.isDataConstant(cur_ea): # check for a string (refs already checked) if self._analyzer.str_identifier.isLocalAsciiString( cur_ea, check_refs=False): length = self._analyzer.str_identifier.defineAsciiString( cur_ea) padded_length = pad(length, self._local_alignment) if padded_length != length: idc.MakeUnknown(cur_ea + length, padded_length - length, 0) idc.MakeData(cur_ea + length, 0, padded_length - length, 0) cur_ea += padded_length # This means it is a constant else: if self._local_pad is None: idc.MakeData(cur_ea, 0, self._local_alignment, 0) else: # check the size of the constant using the byte padding for offset in xrange(self._local_alignment - 1, -1, -1): if idc.Byte(cur_ea + offset) != self._local_pad: break # prepare the bytes idc.MakeUnknown(cur_ea, self._local_alignment, 0) # the data constant - try to make it pretty if offset + 1 == 2: idc.MakeWord(cur_ea) elif offset + 1 == 4: idc.MakeDword(cur_ea) elif offset + 1 == 8: idc.MakeQword(cur_ea) else: idc.MakeData(cur_ea, 0, offset + 1, 0) # the padding idc.MakeData(cur_ea + offset + 1, 0, self._local_alignment - offset + 1, 0) # Now check for a pointer (only supports code pointers for now) if offset + 1 == self._analyzer.addressSize(): value = self._analyzer.parseAdderss(cur_ea) # only support pointers inside our local segment (more probable) if sc.startEA <= value and value < sc.endEA: self._analyzer.markCodePtr( cur_ea, value, aggressive=False) # try a pointer to a declared string else: for sd in sds: if sd.startEA <= value and value <= sd.endEA: line = sark.Line(value) if line.is_string and line.startEA == value: self._analyzer.markDataPtr( cur_ea, value, aggressive=False) break # now move onward cur_ea += self._local_alignment # found nothing, move on else: cur_ea += self._local_alignment
8: 'kn3.ob', 12: 'kn4.ob', 16: 'kn4.ob', } collide_type = { 0: 'NonSolid', 1: 'Collidee', 2: 'Collider', 16: 'Vm', 128: 'Blood', 144: 'BloodStain', } line = sark.Line() next_line = True while next_line: if line.disasm.startswith("AnimationFrame"): sprite, img_num, y, collide, x = unpack('<4BH', line.bytes) y = byte_to_sign(y) x = hex_to_sign(x) sprite = spritesheets[sprite] collide = collide_type[collide] #test = "ImagePosition('{}', {}, {}, {}, {}),".format(sprite, img_num, y, x, collide) test = " - {{sheet: {}, image: {}, y: {}, x: {}, image_type:{}}}".format(sprite, img_num, y, x, collide) print test line = line.next elif line.disasm.startswith('EndOfAnimFrame <0FFh, 0>'):
def analyzeFunction(self, func_ea, src_mode): """Analyze a given function, and creates a canonical representation for it. Args: func_ea (int): effective address of the wanted function src_mode (bool): True iff analyzing a self-compiled source file, otherwise analyzing a binary function Return Value: FunctionContext object representing the analyzed function """ func = sark.Function(func_ea) if src_mode: context = sourceContext()(self.funcNameInner( func.name), 0) # Index is irrelevant for the source analysis else: context = binaryContext()(func_ea, self.funcNameInner( func.name), 0) # The index will be adjusted later, manually func_start = func.start_ea instr_count = 0 call_candidates = set() code_hash = md5() for line in func.lines: instr_count += 1 # Numeric Constants data_refs = list(line.drefs_from) for oper in [x for x in line.insn.operands if x.type.is_imm]: if oper.imm not in data_refs: context.recordConst(oper.imm) # Data Refs (strings, fptrs) for ref in data_refs: # Check for a string (finds un-analyzed strings too) str_const = self.disas.stringAt(ref) if str_const is not None and len(str_const) >= MIN_STR_SIZE: context.recordString(str_const) continue # Check for an fptr called_func = self.disas.funcAt(ref) if called_func is not None: call_candidates.add(self.disas.funcStart(called_func)) elif src_mode: call_candidates.add(ref) continue # Code Refs (calls and unknowns) for cref in line.crefs_from: called_func = self.disas.funcAt(cref) if called_func is None: continue called_func_start = self.disas.funcStart(called_func) if (cref == func_start and line.insn.is_call) or called_func_start != func_start: call_candidates.add(called_func_start) # in binary mode don't let the call_candidates expand too much if not src_mode: [context.recordCall(x) for x in call_candidates] call_candidates = set() # hash the instruction (only in source mode) else: # two cases: # 1. No linker fixups, hash the binary - easy case # 2. Linker fixups, hash the text (includes the symbol name that the linker will use too) has_fixups = False # data variables for dref in line.drefs_from: if sark.Line(dref).name in self.disas.exports(): has_fixups = True break # external code functions if not has_fixups: for cref in line.crefs_from: if sark.Line(cref).name in self.disas.exports(): has_fixups = True break # case #2 if has_fixups: code_hash.update(line.disasm.encode("utf-8")) # case #1 else: code_hash.update(line.bytes) # check all the call candidates together if src_mode: for candidate in call_candidates: ref_func = None called_func = self.disas.funcAt(candidate) if called_func is not None: ref_func = self.disas.funcName(called_func) risky = False else: ref_func = self.disas.nameAt(candidate) risky = True # check if known or unknown if sark.Line(candidate).disasm.split(" ")[0].lower() in ( "extrn", "extern", "import"): context.recordUnknown(ref_func, is_fptr=risky) elif not risky: context.recordCall(ref_func) # set the function's hash context.setHash(code_hash.hexdigest()) context.setFrame(func.frame_size) context.setInstrCount(instr_count) # Now, record the code blocks flow = idaapi.FlowChart(func.func_t) for block in flow: try: context.recordBlock( len(list(sark.CodeBlock(block.start_ea).lines))) except Exception: # happens with code outside of a function continue context.blocks.sort(reverse=True) # Now add the flow analysis context.setCallOrder(self.disas.analyzeFunctionGraph( func_ea, src_mode)) return context
def locateLocalConstants(self, scs, sds): """Locate and define all of the local strings / numeric constants, that match our observed pattern. Args: scs (list): List of (sark) code segments. sds (list): List of (sark) data segments. """ self._analyzer.logger.info( "Locating local strings / constants in the code sections") for sc in scs: cur_ea = pad(sc.start_ea, self._local_alignment) while cur_ea < sc.end_ea: # Only interested in data constants if not self.isDataConstant(cur_ea): cur_ea += self._local_alignment continue # check for a string (refs already checked) if self._analyzer.str_identifier.isLocalAsciiString( cur_ea, check_refs=False): length = self._analyzer.str_identifier.defineAsciiString( cur_ea) padded_length = pad(length, self._local_alignment) if padded_length != length: ida_bytes.del_items(cur_ea + length, 0, padded_length - length) ida_bytes.create_data(cur_ea + length, 0, padded_length - length, 0) cur_ea += padded_length continue # This means it is a constant, now check if we have a padding if self._local_pad is None: ida_bytes.create_data(cur_ea, 0, self._local_alignment, 0) cur_ea += self._local_alignment continue # check the size of the constant using the byte padding for offset in range(self._local_alignment - 1, -1, -1): if idc.get_wide_byte(cur_ea + offset) != self._local_pad: break # prepare the bytes ida_bytes.del_items(cur_ea, 0, self._local_alignment) # the data constant - try to make it pretty if offset + 1 == 2: ida_bytes.create_data(cur_ea, idc.FF_WORD, 2, idc.BADADDR) elif offset + 1 == 4: ida_bytes.create_data(cur_ea, idc.FF_DWORD, 4, idc.BADADDR) elif offset + 1 == 8: ida_bytes.create_data(cur_ea, idc.FF_QWORD, 8, idc.BADADDR) else: ida_bytes.create_data(cur_ea, 0, offset + 1, 0) # the padding ida_bytes.create_data(cur_ea + offset + 1, 0, self._local_alignment - offset + 1, 0) # Now check for a pointer (only supports code pointers for now) if offset + 1 == self._analyzer.addressSize(): value = self._analyzer.parseAdderss(cur_ea) # only support pointers inside our local segment (more probable) if sc.start_ea <= value < sc.end_ea: self._analyzer.markCodePtr(cur_ea, value, aggressive=False) # try a pointer to a declared string else: for sd in sds: if sd.start_ea <= value <= sd.end_ea: line = sark.Line(value) if line.is_string and line.start_ea == value: self._analyzer.markDataPtr( cur_ea, value, aggressive=False) break # now move onward cur_ea += self._local_alignment