def run(self): if len(self.bv.platform.type_libraries) == 0: log_warn(f"No type libraries loaded for: {self.bv.platform.name}") return for sym in self.bv.get_symbols_of_type(SymbolType.ImportedFunctionSymbol): # log_debug(f"checking sym: {sym.name}") for typelib in self.bv.platform.type_libraries: sym_type = typelib.get_named_object(sym.name) # log_debug(f"Checking in typelib: {typelib}") if sym_type == None: continue # log_debug(f"Found type type: {sym_type}") func = self.bv.get_function_at(sym.address) if func == None: continue func.set_user_type(sym_type) log_debug("Updated sym %s at 0x%02X" % (sym.name, sym.address)) self.bv.update_analysis_and_wait()
def get_bininfo(bv): if bv is None: filename = "" if len(sys.argv) > 1: filename = sys.argv[1] else: filename = interaction.get_open_filename_input("Filename:") if filename is None: log.log_warn("No file specified") sys.exit(1) bv = BinaryViewType.get_view_of_file(filename) log.redirect_output_to_log() log.log_to_stdout(True) contents = "## %s ##\n" % bv.file.filename contents += "- START: 0x%x\n\n" % bv.start contents += "- ENTRY: 0x%x\n\n" % bv.entry_point contents += "- ARCH: %s\n\n" % bv.arch.name contents += "### First 10 Functions ###\n" contents += "| Start | Name |\n" contents += "|------:|:-------|\n" for i in xrange(min(10, len(bv.functions))): contents += "| 0x%x | %s |\n" % (bv.functions[i].start, bv.functions[i].symbol.full_name) contents += "### First 10 Strings ###\n" contents += "| Start | Length | String |\n" contents += "|------:|-------:|:-------|\n" for i in xrange(min(10, len(bv.strings))): start = bv.strings[i].start length = bv.strings[i].length string = bv.read(start, length) contents += "| 0x%x |%d | %s |\n" % (start, length, string) return contents
def import_selected(self): selected_type_indexes: List[ QtCore.QModelIndex] = self.types_table.selectedIndexes() selected = set(i.row() for i in selected_type_indexes) for row in selected: name, type_ = self.types_table.model().types[row] self.view.define_user_type(name, type_) selected_object_indexes: List[ QtCore.QModelIndex] = self.objects_table.selectedIndexes() selected = set(i.row() for i in selected_object_indexes) for row in selected: name, type_ = self.objects_table.model().types[row] symbol = next( (s for s in self.view.get_symbols_by_name(str(name)) if s.type in (SymbolType.ImportAddressSymbol, SymbolType.ImportedDataSymbol)), None, ) if symbol is None: log.log_warn(f"Could not find symbol `{name}` in the binary!") continue ptr_type = Type.pointer(self.view.arch, type_) self.view.define_user_data_var(symbol.address, ptr_type) self.view.update_analysis()
def find_ecall_table_heuristic(bv): ptr_s = bv.arch.address_size ptr = 'I' if ptr_s == 4 else 'Q' found = [] try: sections = ('.rdata', '.rodata') for s in sections: if s in bv.sections: sectionname = s break start = bv.sections[sectionname].start length = bv.sections[sectionname].end - start data = bv.read(start, length) i = 5 * ptr_s while i < len(data): nu = data.index(b"\0" * ptr_s, i) if not nu: break i = nu + 1 if (nu % ptr_s) == 0: nr, *ec = unpack(f"<{ptr * 5}", data[nu - 2 * ptr_s:nu + 3 * ptr_s]) if (2 <= nr < 300 and ec[1] == ec[3] == 0 and ec[0] != 0 != ec[2]): found.append(start + nu - 2 * ptr_s) except Exception as e: log_warn(f"exception thrown: {e}") return found
def get_bininfo(bv): if bv is None: filename = "" if len(sys.argv) > 1: filename = sys.argv[1] else: filename = interaction.get_open_filename_input("Filename:") if filename is None: log.log_warn("No file specified") sys.exit(1) bv = BinaryViewType.get_view_of_file(filename) log.log_to_stdout(True) contents = "## %s ##\n" % os.path.basename(bv.file.filename) contents += "- START: 0x%x\n\n" % bv.start contents += "- ENTRY: 0x%x\n\n" % bv.entry_point contents += "- ARCH: %s\n\n" % bv.arch.name contents += "### First 10 Functions ###\n" contents += "| Start | Name |\n" contents += "|------:|:-------|\n" for i in range(min(10, len(bv.functions))): contents += "| 0x%x | %s |\n" % (bv.functions[i].start, bv.functions[i].symbol.full_name) contents += "### First 10 Strings ###\n" contents += "| Start | Length | String |\n" contents += "|------:|-------:|:-------|\n" for i in range(min(10, len(bv.strings))): start = bv.strings[i].start length = bv.strings[i].length string = bv.read(start, length) contents += "| 0x%x |%d | %s |\n" % (start, length, string) return contents
def get_matching_method(bv): method = MatchingMethod(settings.get_string('depanalyzer.matching_method')) if method == MatchingMethod.Address and bv.relocatable: log_warn( 'Attempted address-based matching on relocatable file: resetting to auto' ) method = MatchingMethod.Auto settings.set_string('depanalyzer.matching_method', method.value, view=bv, scope=SettingsScope.SettingsContextScope) if method == MatchingMethod.Ordinal and not supports_ordinals(bv): log_warn( 'Attempted ordinal-based matching on non-supported file type: resetting to auto' ) method = MatchingMethod.Auto settings.set_string('depanalyzer.matching_method', method.value, view=bv, scope=SettingsScope.SettingsContextScope) if method == MatchingMethod.Auto: if supports_ordinals(bv): method = MatchingMethod.Ordinal else: method = MatchingMethod.Name return method
def node_token(node): if type(node) is MediumLevelILInstruction: return str(node).decode('utf-8') elif type(node) is SSAVariable: return "{}#{}".format(node.var.name, node.version) else: log.log_warn("No way to stringify node of type %s" % type(node)) return str(node)
def resolve_imports_for_library(bv, lib): source_bv = peutils.files[lib.name.lower()] exports = pe_parsing.get_exports(source_bv) for import_ in lib.imports: # Find the name name = None for export in exports: if export.ord == import_.ordinal: print(export) name = export.name export_symbol = export.symbol if not name: log_warn("Unable to find name for %r" % import_) # Redefine the IAT thunk symbol original_symbol = bv.get_symbol_at(import_.datavar_addr) # Delete any existing auto symbols if original_symbol: log_info("Renaming %s to %s:%s" % (original_symbol.name, lib.name, name)) bv.undefine_auto_symbol(original_symbol) else: log_info("Creating IAT symbol %s:%s @ %08x" % (lib.name.split(".")[0], name, import_.datavar_addr)) # Create the new symbol bv.define_auto_symbol( Symbol( SymbolType.ImportAddressSymbol, import_.datavar_addr, name + "@IAT", namespace=lib.name.split(".")[0], )) # Transplant type info export_func = source_bv.get_function_at(export_symbol.address) type_tokens = [token.text for token in export_func.type_tokens] i = type_tokens.index(export_symbol.name) type_tokens[i] = "(*const func_name)" type_string = "".join(type_tokens) log_info("Setting type for %s to %r" % (name, type_string)) try: (type_, name) = bv.parse_type_string(type_string) except: log_error("Invalid type, skipping") bv.define_data_var(import_.datavar_addr, type_) # FIXME: Apply params to ImportedFunctionSymbols -- check xref on # datavar and filter by associated symbols # This doesn't actually seem to help and apparently I didn't have to do # this before? Maybe I just didn't handle jump """
def rename_functions(self): renamed = 0 log_info("renaming functions based on .gopclntab section") gopclntab = self.get_section_by_name(".gopclntab") if gopclntab is None: pattern = "\xfb\xff\xff\xff\x00\x00" base_addr = self.bv.find_next_data(0, pattern) if base_addr is None: log_alert("Failed to find section '.gopclntab'") return else: base_addr = gopclntab.start size_addr = base_addr + 8 size = self.get_pointer_at(size_addr) log_info("found .gopclntab section at 0x{:x} with {} entries".format( base_addr, size / (self.ptr_size * 2))) start_addr = size_addr + self.ptr_size end_addr = base_addr + (size * self.ptr_size * 2) for addr in range(start_addr, end_addr, (2 * self.ptr_size)): log_debug("analyzing at 0x{:x}".format(addr)) func_addr = self.get_pointer_at(addr) entry_offset = self.get_pointer_at(addr + self.ptr_size) log_debug("func_addr 0x{:x}, entry offset 0x{:x}".format( func_addr, entry_offset)) name_str_offset = self.get_pointer_at( base_addr + entry_offset + self.ptr_size, 4) name_addr = base_addr + name_str_offset name = self.read_cstring(name_addr) log_debug("found name '{}' for address 0x{:x}".format( name, func_addr)) func = self.bv.get_function_at(func_addr) if not func: func = self.bv.create_user_function(func_addr) if name and len(name) > 2: name = GOFUNC_PREFIX + santize_gofunc_name(name) sym = bn.types.Symbol('FunctionSymbol', func_addr, name, name) self.bv.define_user_symbol(sym) renamed += 1 else: log_warn( ("not using function name {!r} for function at 0x{:x}" " in .gopclntab addr 0x{:x} name addr 0x{:x}").format( name, func_addr, addr, name_addr)) log_info("renamed {} go functions".format(renamed))
def decode(data, addr): decoder = Decoder(data) try: instr = Instruction(decoder) instr.decode(decoder, addr) return instr except KeyError: log.log_warn('At address {:05x}: unknown encoding {}'.format( addr, data.hex())) except coding.BufferTooShort: pass
def get_bin_view(bv): if bv is None: filename = "" if len(sys.argv) > 1: filename = sys.argv[1] else: filename = interaction.get_open_filename_input("Filename:") if filename is None: log.log_warn("No file specified") sys.exit(1) bv = BinaryViewType.get_view_of_file(filename) log.log_to_stdout(True) return bv
def set_comment_at_xref(self, xref, comment): try: op = xref.function.get_lifted_il_at(xref.address).operation except IndexError: w = "ManticoreTrace: Could not lookup " + hex(xref.address) w += " address for function " + str(xref.function) log.log_warn(w) return if not (op == enums.LowLevelILOperation.LLIL_CALL or op == enums.LowLevelILOperation.LLIL_JUMP or op == enums.LowLevelILOperation.LLIL_JUMP_TO or op == enums.LowLevelILOperation.LLIL_SYSCALL or op == enums.LowLevelILOperation.LLIL_GOTO): return self.cov_comments.add((xref.function, xref.address)) xref.function.set_comment_at(xref.address, comment)
def r(kind, il, v=0): """ Never called with MOVX, handles IRAM/SFRs addresses only. The MOVX instruction is distinct from the others. Never called on 16-bit immediates. No way to distinguish from 8-bit. """ if kind.startswith('@'): reg = il.reg(1, kind[1:]) addr = il.add(6, reg, il.const(6, mem.IRAM)) return il.load(1, addr) if kind == '#data': return il.const(1, v) if kind.endswith('addr'): if kind == 'code addr': return il.const_pointer(6, v) if kind == 'data addr': if v in mem.regs: return il.reg(1, mem.regs[v]) # TODO: overlay PSW as register? how to compute from flags? return il.load(1, il.const_pointer(6, v)) if kind.endswith('bit addr'): # cosmetic / prefix, optional byte, bit = v if byte == mem.PSW and bit in mem.flags: return il.flag(mem.flags[bit]) if byte in mem.regs: if mem.regs[byte] == 'A' and bit == 7: # a47e return il.flag( 's' ) # TODO TODO TODO how will setting this be tracked?? # Strangely, based on LLIL pretty-printing, test_bit takes a # *mask* not a bit index. return il.test_bit(1, il.reg(1, mem.regs[byte]), il.const(0, 1 << bit)) addr = il.const_pointer(6, byte) return il.test_bit(1, il.load(1, addr), il.const(0, 1 << bit)) if kind == 'DPTR': return il.reg(2, kind) if kind.startswith('R') or kind in ['A', 'B'] or kind in mem.regs: return il.reg(1, kind) if kind == 'C': return il.flag('c') # @A+DPTR and @A+PC can be special-cased in their instructions log_warn('r ' + repr((kind, il, v))) assert not "reachable"
def w(kind, il, val, v=0): """ kind: type of write il: LowLevelILFunction val: symbolic source v: constant source """ if kind.startswith('@'): reg = il.reg(1, kind[1:]) addr = il.add(6, reg, il.const(6, mem.IRAM)) return il.append(il.store(1, addr, val)) if kind.endswith('addr'): if kind == 'data addr': if v in mem.regs: return il.append(il.set_reg(1, mem.regs[v], val)) # aa5b good test aa68 # TODO: overlay PSW as register? how to compute from flags? return il.append(il.store(1, il.const_pointer(6, v), val)) if kind.endswith('bit addr'): # cosmetic / prefix, optional byte, bit = v if byte == mem.PSW and bit in mem.flags: return il.append(il.set_flag(mem.flags[bit], val)) if byte in mem.regs: # a465 src = il.reg(1, mem.regs[byte]) mask = il.shift_left(1, il.const(1, 1), il.const(1, bit)) # TODO: endianness, also need to clear bit not just set it... return il.append( il.set_reg(1, mem.regs[byte], il.or_expr(1, src, mask))) # TODO sketchy bit-write endianness addr = il.const_pointer(6, byte) # should be properly mapped by ana mask = il.shift_left(1, il.const(1, 1), il.const(1, bit)) val = il.or_expr(1, il.load(1, addr), mask) # <- also only sets, never clears :| return il.append(il.store(1, addr, val)) if kind.startswith('R') or kind in ['A', 'B']: return il.append(il.set_reg(1, kind, val)) if kind == 'DPTR': return il.append(il.set_reg(2, kind, val)) if kind == 'C': return il.append(il.set_flag('c', val)) log_warn('w ' + repr((kind, il, val, v))) assert not "reachable"
def safe_psx_set_type(view, f, calladdr, callnr): try: type = psx_get_type(calladdr, callnr) except: if calladdr: log_error("failed to lookup %s call %s" % (format(calladdr, '#5x'), format(callnr, '#05x'))) else: log_error("failed to lookup syscall %s" % callnr) if type: new_name = "PSX_" + type[0] f.name = new_name i = 0 # TODO: Avoid touching user defined functions while f.name != new_name: log_warn( "Unable to change name to %r, duplicate? Trying numbered alias" % new_name) new_name = "PSX_" + type[0] + "_copy_" + str(i) f.name = new_name f.set_user_type(view.parse_type_string(type[1])[0]) else: if calladdr: log_warn("Unknown %s call: %s" % (format(calladdr, '#5x'), format(callnr, '#05x'))) else: log_warn("Unknown syscall %s" % callnr)
def main(args): log_to_stdout(LogLevel.InfoLog) if not os.path.exists(args.input_file): log_warn(f"input file: {args.input_file} does not exist") return 1 dir_path = os.path.dirname(os.path.realpath(args.output)) if not os.path.exists(dir_path): log_warn(f"Output path directory {dir_path} does not exist") return 1 try: platform: Platform = Platform[args.platform] except KeyError: log_warn(f"'{args.platform}' is not supported binja platform") return 1 with open(args.input_file) as fd: type_data = fd.read() if args.definitions: prepend_str = "" for defintion in args.definitions.split(","): prepend_str += f"#define {defintion} 1\n" type_data = "%s%s" % (prepend_str, type_data) types_path = [os.path.dirname(os.path.realpath(args.input_file))] type_res = platform.parse_types_from_source(type_data, filename=args.input_file, include_dirs=types_path) cur_typelib: TypeLibrary = TypeLibrary.new(Architecture[platform.arch.name], args.name) for name, type_obj in type_res.functions.items(): # log_info(f"Adding function {name}") cur_typelib.add_named_object(name, type_obj) for name, type_obj in type_res.types.items(): # log_info(f"Adding type {name}") cur_typelib.add_named_type(name, type_obj) cur_typelib.add_platform(platform) if args.alt_names: for name in args.alt_names.split(","): cur_typelib.add_alternate_name(name) if args.guid: cur_typelib.guid = args.guid cur_typelib.finalize() log_info(f"Wrote type library to {args.output}") cur_typelib.write_to_file(args.output) return 0
def __setitem__(self, k, v): if self.blacklist_enabled and k in self.__blacklist: log.log_warn( 'Setting variable "{}" will have no affect as it is automatically controlled by the ScriptingProvider.' .format(k)) super(BlacklistedDict, self).__setitem__(k, v)
def fix_printfs(view: BinaryView): printf = view.get_symbols_by_name('_printf') if not printf: printf = view.get_symbols_by_name('printf') if not printf: return for sym in printf: function = view.get_function_at(sym.address) if not function: continue xrefs = view.get_code_refs(function.start) for xref in xrefs: caller: Function = xref.function call_mlil = caller.get_low_level_il_at(xref.address).mlil print(call_mlil) if call_mlil is None: continue fmt_operand = call_mlil.params[0] if fmt_operand.operation == MediumLevelILOperation.MLIL_VAR: log.log_warn( f"Potential format string bug: {fmt_operand.address:x}") continue elif fmt_operand.operation in ( MediumLevelILOperation.MLIL_CONST_PTR, MediumLevelILOperation.MLIL_CONST): fmt_address = fmt_operand.constant fmt = view.get_ascii_string_at(fmt_address, 2) if fmt is None: continue fmt_value = fmt.value else: continue specifiers = fmt_value.split('%') param_types = [] for specifier in specifiers[1:]: if not specifier: continue if specifier.startswith('d'): param_types.append(Type.int(4, sign=True)) elif specifier.startswith('s'): param_types.append(Type.pointer(view.arch, Type.char())) elif specifier.startswith('p'): param_types.append(Type.pointer(view.arch, Type.void())) else: log.log_warn( f'Unknown format specifier: {specifier}; skipping') param_types.append(Type.pointer(view.arch, Type.void())) param_idx = 1 params = [ FunctionParameter(Type.pointer(view.arch, Type.char()), 'fmt') ] for param in param_types: params.append(FunctionParameter(param, f'arg{param_idx}')) param_idx += 1 caller.set_call_type_adjustment(xref.address, Type.function(Type.int(4), params))
def init(self): try: hdr = self.parent_view.read(0, 0x800) # Remember, Pythons indexer is retarded: from:(to+1) self.init_pc = struct.unpack("<L", hdr[0x10:0x14])[0] self.text_start = struct.unpack("<L", hdr[0x18:0x1c])[0] self.text_size = struct.unpack("<L", hdr[0x1c:0x20])[0] self.init_sp = struct.unpack("<L", hdr[0x30:0x34])[0] self.info = hdr[0x4c:self.HDR_SIZE] # log_debug("/info: %r" % self.info) log_debug("/info size: %s" % format(len(self.info), '#010x')) log_info("PC: %s" % format(self.init_pc, '#010x')) log_info("TEXT: %s" % format(self.text_start, '#010x')) log_info("SIZE: %s" % format(self.text_size, '#010x')) log_info("SP: %s" % format(self.init_sp, '#010x')) log_info("info: %s" % self.info) # PSX came with 2M, but the BIOS supports 8 # for dev machines. Supposed be multiple if # 2048, but that is not required for the # method used to sideload homebrew. (FIXME: Is # it enforced by the BIOS? Can't remember...) # FIXME: this is just a sanity check. Make it # check if text_start + text_size would run # outside address space. if (self.text_size > 0x800000): log_warn("size > 8M: %d", self.text_size) if (self.text_size % 2048 != 0): log_warn("size not divisable by 2k") text = self.parent_view.read(self.HDR_SIZE, self.text_size) log_info("Actual size of aquired TEXT: %s" % format(len(text), '#010x')) if (len(text) != self.text_size): log_error( "Size of aquired data is not same as header-prescribed TEXT size. Truncated file?" ) # add_auto_segment(start, length, # data_offset, data_length, flags) r__ = SegmentFlag.SegmentReadable rw_ = (SegmentFlag.SegmentReadable | SegmentFlag.SegmentWritable) rwx = (SegmentFlag.SegmentReadable | SegmentFlag.SegmentWritable | SegmentFlag.SegmentExecutable) r_x = (SegmentFlag.SegmentReadable | SegmentFlag.SegmentExecutable) r_xc = (SegmentFlag.SegmentReadable | SegmentFlag.SegmentExecutable | SegmentFlag.SegmentContainsCode) # Scratchpad RAM 1k self.add_auto_segment(0x9F800000, 1024, 0, 0, rwx) self.add_auto_section("Scratchpad", 0x9F800000, 1024) # FIXME: I seem to remember most IO access as # in the KSEG1 region. This wont cover that. # IO Ports 8k self.add_auto_segment(0x9F801000, 8 * 1024, 0, 0, rwx) self.add_auto_section("IO Ports", 0x9F801000, 8 * 1024) # Expansion 2 (IO Ports) 8k self.add_auto_segment(0x9F802000, 8 * 1024, 0, 0, rwx) self.add_auto_section("Expansion region 2 (IO Ports)", 0x9F802000, 8 * 1024) # Expansion 3 2M self.add_auto_segment(0x9FA00000, 0x200000, 0, 0, rwx) self.add_auto_section("Expansion region 3", 0x9FA00000, 0x200000) # BIOS ROM 512k self.add_auto_segment(0x9FC00000, 512 * 1024, 0, 0, r_x) self.add_auto_section("BIOS", 0x9FC00000, 512 * 1024) # RAM (cached address space) 2M # Dividing this into pre-EXE and post-EXE # space since it's the only way I've found to # not have the exe zeroed out # FIXME: The areas definitions overlap by one # byte: Getting one missing byte in the # viewer if I don't. Is BN using the wierd # python semantics of ranges? ramsize = 0x200000 prestart = 0x80000000 presize = (self.text_start - 0) - 0x80000000 if (presize > 0): log_info("pre-RAM: %s - %s, size: %s" % (format(prestart, '#010x'), format(prestart + presize, '#010x'), format(presize, '#010x'))) self.add_auto_segment(prestart, presize, 0, 0, rwx) self.add_auto_section("RAM (pre EXE)", 0x80000000, presize) # Area for the actual executable. Will overlap # with RAM if it's a correct PSX-EXE self.add_auto_segment(self.text_start, self.text_size, self.HDR_SIZE, self.text_size, r_xc) self.add_auto_section("PS-X EXE", self.text_start, self.text_size) # semantics = SectionSemantics.ReadOnlyCodeSectionSemantics) # RAM (cached address space) 2M poststart = self.text_start + self.text_size postsize = (prestart + ramsize) - (self.text_start + self.text_size) if (postsize > 0): log_info("post-RAM: %s - %s, size: %s" % (format(poststart, '#010x'), format(poststart + postsize, '#010x'), format(postsize, '#010x'))) self.add_auto_segment(poststart, postsize, 0, 0, rwx) self.add_auto_section("RAM (post EXE)", poststart, postsize) self.define_auto_symbol( Symbol(SymbolType.FunctionSymbol, self.init_pc, "_start")) self.define_auto_symbol( Symbol(SymbolType.FunctionSymbol, self.init_sp, "_stack")) # default: 0x801ffff0 self.add_entry_point(self.init_pc) # The iomapped control of devices in a PSX is # high up that every time those values are # used you can be pretty sure we are talking # about the control registers and not some # random loop counter. # # FIXME: With the exception of a0, b0 and # c0-calls, those should be lifted in some # other manner but are useful enough that they # are hardcoded right now. for addr, symbol in psx_memmap_constants.iteritems(): self.define_auto_symbol( Symbol(SymbolType.DataSymbol, addr, symbol)) # Uncomment this and remove the hook if you want to run # function BIOS calls manually. #PluginCommand.register('Find PSX BIOS calls', # 'Find PSX BIOS calls and rename them.', # find_bios_calls_run) # Run PSX BIOS function finder after analysis self.add_analysis_completion_event( lambda _: find_bios_calls_run(self)) return True except: log_error(traceback.format_exc()) return False
def analyze_cxx_abi(view, start=None, length=None, task=None): platform = view.platform arch = platform.arch void_p_ty = Type.pointer(arch, Type.void()) char_p_ty = Type.pointer(arch, Type.int(1)) unsigned_int_ty = Type.int(arch.default_int_size, False) signed_int_ty = Type.int(arch.default_int_size, True) base_type_info_ty = Type.named_type( NamedTypeReference(name='std::type_info')) base_type_info_ptr_ty = Type.pointer(arch, base_type_info_ty) def char_array_ty(length): return Type.array(Type.int(1), strings[0].length) def type_info_ty(kind=None): type_info_struct = Structure() type_info_struct.append(void_p_ty, 'vtable') type_info_struct.append(char_p_ty, 'name') if kind == 'si_class': type_info_struct.append(base_type_info_ptr_ty, 'base_type') return Type.structure_type(type_info_struct) def vtable_ty(vfunc_count): vtable_struct = Structure() vtable_struct.append(signed_int_ty, 'top_offset') vtable_struct.append(base_type_info_ptr_ty, 'typeinfo') vtable_struct.append(Type.array(void_p_ty, vfunc_count), 'functions') return Type.structure_type(vtable_struct) if platform.name.startswith("windows-"): long_size = arch.default_int_size else: long_size = arch.address_size if arch.name.startswith('x86'): char_signed = True else: char_signed = False # not always true short_size = 2 # not always true long_long_size = 8 # not always true ty_for_cxx_builtin = { 'void': Type.void(), 'wchar_t': Type.int(2, sign=char_signed, altname='wchar_t'), 'bool': Type.bool(), 'char': Type.int(1, sign=char_signed), 'signed char': Type.int(1, sign=True), 'unsigned char': Type.int(1, sign=False), 'short': Type.int(short_size, sign=True), 'unsigned short': Type.int(short_size, sign=False), 'int': Type.int(arch.default_int_size, sign=True), 'unsigned int': Type.int(arch.default_int_size, sign=False), 'long': Type.int(long_size, sign=True), 'unsigned long': Type.int(long_size, sign=False), 'long long': Type.int(long_long_size, sign=True), 'unsigned long long': Type.int(long_long_size, sign=False), '__int128': Type.int(16, sign=True), 'unsigned __int128': Type.int(16, sign=False), 'float': Type.float(4), 'double': Type.float(8), '__float80': Type.float(10), '__float128': Type.float(16), 'char32_t': Type.int(4, sign=char_signed, altname='char32_t'), 'char16_t': Type.int(2, sign=char_signed, altname='char16_t'), } def ty_from_demangler_node(node, cv_qual=frozenset(), arg_count_hint=None): if node.kind == 'builtin': if node.value in ty_for_cxx_builtin: return ty_for_cxx_builtin[node.value] else: return None elif node.kind in ['name', 'qual_name']: named_ty_ref = NamedTypeReference(name=str(node)) return Type.named_type(named_ty_ref) elif node.kind in ['pointer', 'lvalue', 'rvalue']: pointee_ty = ty_from_demangler_node(node.value) if pointee_ty is None: return None is_const = ('const' in cv_qual) is_volatile = ('volatile' in cv_qual) if node.kind == 'pointer': return Type.pointer(arch, pointee_ty, is_const, is_volatile) elif node.kind == 'lvalue': return Type.pointer( arch, pointee_ty, is_const, is_volatile, ref_type=ReferenceType.ReferenceReferenceType) elif node.kind == 'rvalue': return Type.pointer(arch, pointee_ty, is_const, is_volatile, ref_type=ReferenceType.RValueReferenceType) elif node.kind == 'cv_qual': return ty_from_demangler_node(node.value, cv_qual=node.qual) elif node.kind == 'func': is_ctor_dtor = False if node.name and node.name.kind == 'qual_name': qual_name = node.name.value if qual_name[-1].kind in ['ctor', 'dtor']: is_ctor_dtor = True if is_ctor_dtor: ret_ty = Type.void() elif node.ret_ty is not None: ret_ty = ty_from_demangler_node(node.ret_ty) if ret_ty is None: return None else: ret_ty = Type.int(arch.default_int_size).with_confidence(0) arg_nodes = list(node.arg_tys) arg_tys = [] var_arg = False if arg_nodes[-1].kind == 'builtin' and arg_nodes[-1].value == '...': arg_nodes.pop() var_arg = True elif arg_nodes[0].kind == 'builtin' and arg_nodes[ 0].value == 'void': arg_nodes = arg_nodes[1:] this_arg = False if node.name and node.name.kind == 'qual_name': qual_name = node.name.value if is_ctor_dtor or (arg_count_hint is not None and len(arg_nodes) == arg_count_hint - 1): this_arg = True this_node = Node('qual_name', qual_name[:-1]) this_ty = ty_from_demangler_node(this_node) if this_ty is None: return None arg_tys.append(Type.pointer(arch, this_ty)) for arg_node in arg_nodes: arg_ty = ty_from_demangler_node(arg_node) if arg_ty is None: return None arg_tys.append(arg_ty) ty = Type.function(ret_ty, arg_tys, variable_arguments=var_arg) if arg_count_hint is not None: # toplevel invocation, so return whether we inferred a this argument return this_arg, ty else: return ty else: log.log_warn("Cannot convert demangled AST {} to a type".format( repr(node))) reader = BinaryReader(view) def read(size): if size == 4: return reader.read32() elif size == 8: return reader.read64() else: assert False symbols = view.get_symbols(start, length) if task: task.set_total(len(symbols)) mangled_re = re.compile('_?_Z') demangler_failures = 0 for symbol in symbols: if task and not task.advance(): break if not mangled_re.match(symbol.raw_name): continue is_data = (symbol.type == SymbolType.DataSymbol) is_code = (symbol.type in [ SymbolType.FunctionSymbol, SymbolType.ImportedFunctionSymbol ]) raw_name, suffix = symbol.raw_name, '' if '@' in raw_name: match = re.match(r'^(.+?)(@.+)$', raw_name) raw_name, suffix = match.group(1), match.group(2) try: name_ast = parse_mangled(raw_name) if name_ast is None: log.log_warn( "Demangler failed to recognize {}".format(raw_name)) demangler_failures += 1 except NotImplementedError as e: log.log_warn("Demangler feature missing on {}: {}".format( raw_name, str(e))) demangler_failures += 1 if name_ast: if name_ast.kind == 'func': short_name = str(name_ast.name) else: short_name = str(name_ast) symbol = Symbol(symbol.type, symbol.address, short_name=short_name + suffix, full_name=str(name_ast) + suffix, raw_name=symbol.raw_name) else: symbol = Symbol(symbol.type, symbol.address, short_name=symbol.raw_name, full_name=None, raw_name=symbol.raw_name) view.define_auto_symbol(symbol) if name_ast is None: continue elif is_data and name_ast.kind == 'typeinfo_name': strings = view.get_strings(symbol.address, 1) if not strings: continue view.define_data_var(symbol.address, char_array_ty(length)) elif is_data and name_ast.kind == 'typeinfo': reader.offset = symbol.address + arch.address_size * 2 kind = None # heuristic: is this is an abi::__si_class_type_info? base_or_flags = read(arch.default_int_size) base_symbol = view.get_symbol_at(base_or_flags) if base_symbol and base_symbol.raw_name.startswith('_ZTI'): kind = 'si_class' view.define_data_var(symbol.address, type_info_ty(kind)) elif is_data and name_ast.kind == 'vtable': vtable_addr = symbol.address reader.offset = vtable_addr + arch.address_size * 2 while True: vfunc_count = 0 check_next = True while True: vfunc_ptr_symbol = view.get_symbol_at(reader.offset) if vfunc_ptr_symbol and vfunc_ptr_symbol.raw_name.startswith( '_Z'): # any C++ symbol definitely terminates the vtable check_next = False break # heuristic: existing function vfunc_addr = read(arch.address_size) if view.get_function_at(vfunc_addr): vfunc_count += 1 continue # explicitly reject null pointers; in position-independent code # address zero can belong to the executable segment if vfunc_addr == 0: check_next = False break # heuristic: pointer to executable memory vfunc_segment = view.get_segment_at(vfunc_addr) if vfunc_addr != 0 and vfunc_segment and vfunc_segment.executable: view.add_function(vfunc_addr) vfunc_count += 1 log.log_info( 'Discovered function at {:#x} via {}'.format( vfunc_addr, symbol.full_name or symbol.short_name)) changed = True continue # we've fell off the end of the vtable break view.define_data_var(vtable_addr, vtable_ty(vfunc_count)) if check_next: # heuristic: can another vtable follow this one? let's see if it has typeinfo, # since that should be always true for when we have a virtual base typeinfo_ptr = read(arch.address_size) typeinfo_ptr_symbol = view.get_symbol_at(typeinfo_ptr) if typeinfo_ptr_symbol and typeinfo_ptr_symbol.raw_name.startswith( '_ZTI'): vtable_addr = reader.offset - 2 * arch.address_size # documentat it with a symbol secondary_symbol_name = '{}_secondary_{:x}'.format( symbol.short_name, vtable_addr - symbol.address) secondary_symbol = Symbol( SymbolType.DataSymbol, vtable_addr, short_name=secondary_symbol_name) view.define_auto_symbol(secondary_symbol) continue break elif is_code and name_ast.kind == 'func': func = view.get_function_at(symbol.address) demangled = ty_from_demangler_node( name_ast, arg_count_hint=len(func.function_type.parameters)) if demangled is not None: this_arg, ty = demangled func.apply_auto_discovered_type(ty) view.update_analysis() if demangler_failures: log.log_warn('{} demangler failures'.format(demangler_failures))
def ty_from_demangler_node(node, cv_qual=frozenset(), arg_count_hint=None): if node.kind == 'builtin': if node.value in ty_for_cxx_builtin: return ty_for_cxx_builtin[node.value] else: return None elif node.kind in ['name', 'qual_name']: named_ty_ref = NamedTypeReference(name=str(node)) return Type.named_type(named_ty_ref) elif node.kind in ['pointer', 'lvalue', 'rvalue']: pointee_ty = ty_from_demangler_node(node.value) if pointee_ty is None: return None is_const = ('const' in cv_qual) is_volatile = ('volatile' in cv_qual) if node.kind == 'pointer': return Type.pointer(arch, pointee_ty, is_const, is_volatile) elif node.kind == 'lvalue': return Type.pointer( arch, pointee_ty, is_const, is_volatile, ref_type=ReferenceType.ReferenceReferenceType) elif node.kind == 'rvalue': return Type.pointer(arch, pointee_ty, is_const, is_volatile, ref_type=ReferenceType.RValueReferenceType) elif node.kind == 'cv_qual': return ty_from_demangler_node(node.value, cv_qual=node.qual) elif node.kind == 'func': is_ctor_dtor = False if node.name and node.name.kind == 'qual_name': qual_name = node.name.value if qual_name[-1].kind in ['ctor', 'dtor']: is_ctor_dtor = True if is_ctor_dtor: ret_ty = Type.void() elif node.ret_ty is not None: ret_ty = ty_from_demangler_node(node.ret_ty) if ret_ty is None: return None else: ret_ty = Type.int(arch.default_int_size).with_confidence(0) arg_nodes = list(node.arg_tys) arg_tys = [] var_arg = False if arg_nodes[-1].kind == 'builtin' and arg_nodes[-1].value == '...': arg_nodes.pop() var_arg = True elif arg_nodes[0].kind == 'builtin' and arg_nodes[ 0].value == 'void': arg_nodes = arg_nodes[1:] this_arg = False if node.name and node.name.kind == 'qual_name': qual_name = node.name.value if is_ctor_dtor or (arg_count_hint is not None and len(arg_nodes) == arg_count_hint - 1): this_arg = True this_node = Node('qual_name', qual_name[:-1]) this_ty = ty_from_demangler_node(this_node) if this_ty is None: return None arg_tys.append(Type.pointer(arch, this_ty)) for arg_node in arg_nodes: arg_ty = ty_from_demangler_node(arg_node) if arg_ty is None: return None arg_tys.append(arg_ty) ty = Type.function(ret_ty, arg_tys, variable_arguments=var_arg) if arg_count_hint is not None: # toplevel invocation, so return whether we inferred a this argument return this_arg, ty else: return ty else: log.log_warn("Cannot convert demangled AST {} to a type".format( repr(node)))
def annotate_operations_ending_at_mlil_instruction( bv: BinaryView, instruction: MediumLevelILInstruction, function: Function ): """ Annotate divisions and modulos that end at the specified MLIL instruction :bv: Current binaryview :instruction: Instruction to examine :function: Current function """ ssa_instruction = instruction.ssa_form # TODO: There is probably an easy way to know more instructions that can be skipped. if ssa_instruction.operation != MediumLevelILOperation.MLIL_SET_VAR_SSA: log.log_debug("Deoptimizer: Skipping Instruction") return None # 15 found experimentially. There may be longer modulo optimiztions. backtracking_state = BacktrackingState(bv, function, depth=15) start = MLILInstructionExecutor(bv, ssa_instruction) try: start.execute(backtracking_state) except NotImplementedError as e: log.log_debug( "Unsupported Instruction: {}. If this instruction is necessary to deoptimize your code, please report this to the github: https://github.com/jmprdi/binja-division-deoptimization".format( e.args[0] ) ) return except Exception as e: log.log_warn( "Deoptimizer Error: {} Please report this to the github: https://github.com/jmprdi/binja-division-deoptimization".format( repr(e) ) ) raise e return if len(backtracking_state.potential_inputs) == 0: log.log_debug("Deoptimizer: No potential inputs") return None input_bv = backtracking_state.potential_inputs[-1] output_bv = backtracking_state.variables[ssa_instruction.dest] def do_operation(dividend): s = Solver() s.set("timeout", 10) s.add(input_bv == dividend) r = s.check() if r != sat: return None m = s.model() solved = m.eval(output_bv) try: return solved.as_long() except AttributeError: return None modulo = modulo_binary_search(do_operation, 2 ** input_bv.size()) if modulo is not None: bv.set_comment_at(ssa_instruction.address, "modulo by {}".format(modulo)) return divisor = integer_division_binary_search(do_operation, 2 ** input_bv.size()) if divisor is not None: bv.set_comment_at(ssa_instruction.address, "divide by {}".format(divisor)) return