def analyze_cxx_abi(view, start=None, length=None, task=None): platform = view.platform arch = platform.arch void_p_ty = Type.pointer(arch, Type.void()) char_p_ty = Type.pointer(arch, Type.int(1)) unsigned_int_ty = Type.int(arch.default_int_size, False) signed_int_ty = Type.int(arch.default_int_size, True) base_type_info_ty = Type.named_type( NamedTypeReference(name='std::type_info')) base_type_info_ptr_ty = Type.pointer(arch, base_type_info_ty) def char_array_ty(length): return Type.array(Type.int(1), strings[0].length) def type_info_ty(kind=None): type_info_struct = Structure() type_info_struct.append(void_p_ty, 'vtable') type_info_struct.append(char_p_ty, 'name') if kind == 'si_class': type_info_struct.append(base_type_info_ptr_ty, 'base_type') return Type.structure_type(type_info_struct) def vtable_ty(vfunc_count): vtable_struct = Structure() vtable_struct.append(signed_int_ty, 'top_offset') vtable_struct.append(base_type_info_ptr_ty, 'typeinfo') vtable_struct.append(Type.array(void_p_ty, vfunc_count), 'functions') return Type.structure_type(vtable_struct) if platform.name.startswith("windows-"): long_size = arch.default_int_size else: long_size = arch.address_size if arch.name.startswith('x86'): char_signed = True else: char_signed = False # not always true short_size = 2 # not always true long_long_size = 8 # not always true ty_for_cxx_builtin = { 'void': Type.void(), 'wchar_t': Type.int(2, sign=char_signed, altname='wchar_t'), 'bool': Type.bool(), 'char': Type.int(1, sign=char_signed), 'signed char': Type.int(1, sign=True), 'unsigned char': Type.int(1, sign=False), 'short': Type.int(short_size, sign=True), 'unsigned short': Type.int(short_size, sign=False), 'int': Type.int(arch.default_int_size, sign=True), 'unsigned int': Type.int(arch.default_int_size, sign=False), 'long': Type.int(long_size, sign=True), 'unsigned long': Type.int(long_size, sign=False), 'long long': Type.int(long_long_size, sign=True), 'unsigned long long': Type.int(long_long_size, sign=False), '__int128': Type.int(16, sign=True), 'unsigned __int128': Type.int(16, sign=False), 'float': Type.float(4), 'double': Type.float(8), '__float80': Type.float(10), '__float128': Type.float(16), 'char32_t': Type.int(4, sign=char_signed, altname='char32_t'), 'char16_t': Type.int(2, sign=char_signed, altname='char16_t'), } def ty_from_demangler_node(node, cv_qual=frozenset(), arg_count_hint=None): if node.kind == 'builtin': if node.value in ty_for_cxx_builtin: return ty_for_cxx_builtin[node.value] else: return None elif node.kind in ['name', 'qual_name']: named_ty_ref = NamedTypeReference(name=str(node)) return Type.named_type(named_ty_ref) elif node.kind in ['pointer', 'lvalue', 'rvalue']: pointee_ty = ty_from_demangler_node(node.value) if pointee_ty is None: return None is_const = ('const' in cv_qual) is_volatile = ('volatile' in cv_qual) if node.kind == 'pointer': return Type.pointer(arch, pointee_ty, is_const, is_volatile) elif node.kind == 'lvalue': return Type.pointer( arch, pointee_ty, is_const, is_volatile, ref_type=ReferenceType.ReferenceReferenceType) elif node.kind == 'rvalue': return Type.pointer(arch, pointee_ty, is_const, is_volatile, ref_type=ReferenceType.RValueReferenceType) elif node.kind == 'cv_qual': return ty_from_demangler_node(node.value, cv_qual=node.qual) elif node.kind == 'func': is_ctor_dtor = False if node.name and node.name.kind == 'qual_name': qual_name = node.name.value if qual_name[-1].kind in ['ctor', 'dtor']: is_ctor_dtor = True if is_ctor_dtor: ret_ty = Type.void() elif node.ret_ty is not None: ret_ty = ty_from_demangler_node(node.ret_ty) if ret_ty is None: return None else: ret_ty = Type.int(arch.default_int_size).with_confidence(0) arg_nodes = list(node.arg_tys) arg_tys = [] var_arg = False if arg_nodes[-1].kind == 'builtin' and arg_nodes[-1].value == '...': arg_nodes.pop() var_arg = True elif arg_nodes[0].kind == 'builtin' and arg_nodes[ 0].value == 'void': arg_nodes = arg_nodes[1:] this_arg = False if node.name and node.name.kind == 'qual_name': qual_name = node.name.value if is_ctor_dtor or (arg_count_hint is not None and len(arg_nodes) == arg_count_hint - 1): this_arg = True this_node = Node('qual_name', qual_name[:-1]) this_ty = ty_from_demangler_node(this_node) if this_ty is None: return None arg_tys.append(Type.pointer(arch, this_ty)) for arg_node in arg_nodes: arg_ty = ty_from_demangler_node(arg_node) if arg_ty is None: return None arg_tys.append(arg_ty) ty = Type.function(ret_ty, arg_tys, variable_arguments=var_arg) if arg_count_hint is not None: # toplevel invocation, so return whether we inferred a this argument return this_arg, ty else: return ty else: log.log_warn("Cannot convert demangled AST {} to a type".format( repr(node))) reader = BinaryReader(view) def read(size): if size == 4: return reader.read32() elif size == 8: return reader.read64() else: assert False symbols = view.get_symbols(start, length) if task: task.set_total(len(symbols)) mangled_re = re.compile('_?_Z') demangler_failures = 0 for symbol in symbols: if task and not task.advance(): break if not mangled_re.match(symbol.raw_name): continue is_data = (symbol.type == SymbolType.DataSymbol) is_code = (symbol.type in [ SymbolType.FunctionSymbol, SymbolType.ImportedFunctionSymbol ]) raw_name, suffix = symbol.raw_name, '' if '@' in raw_name: match = re.match(r'^(.+?)(@.+)$', raw_name) raw_name, suffix = match.group(1), match.group(2) try: name_ast = parse_mangled(raw_name) if name_ast is None: log.log_warn( "Demangler failed to recognize {}".format(raw_name)) demangler_failures += 1 except NotImplementedError as e: log.log_warn("Demangler feature missing on {}: {}".format( raw_name, str(e))) demangler_failures += 1 if name_ast: if name_ast.kind == 'func': short_name = str(name_ast.name) else: short_name = str(name_ast) symbol = Symbol(symbol.type, symbol.address, short_name=short_name + suffix, full_name=str(name_ast) + suffix, raw_name=symbol.raw_name) else: symbol = Symbol(symbol.type, symbol.address, short_name=symbol.raw_name, full_name=None, raw_name=symbol.raw_name) view.define_auto_symbol(symbol) if name_ast is None: continue elif is_data and name_ast.kind == 'typeinfo_name': strings = view.get_strings(symbol.address, 1) if not strings: continue view.define_data_var(symbol.address, char_array_ty(length)) elif is_data and name_ast.kind == 'typeinfo': reader.offset = symbol.address + arch.address_size * 2 kind = None # heuristic: is this is an abi::__si_class_type_info? base_or_flags = read(arch.default_int_size) base_symbol = view.get_symbol_at(base_or_flags) if base_symbol and base_symbol.raw_name.startswith('_ZTI'): kind = 'si_class' view.define_data_var(symbol.address, type_info_ty(kind)) elif is_data and name_ast.kind == 'vtable': vtable_addr = symbol.address reader.offset = vtable_addr + arch.address_size * 2 while True: vfunc_count = 0 check_next = True while True: vfunc_ptr_symbol = view.get_symbol_at(reader.offset) if vfunc_ptr_symbol and vfunc_ptr_symbol.raw_name.startswith( '_Z'): # any C++ symbol definitely terminates the vtable check_next = False break # heuristic: existing function vfunc_addr = read(arch.address_size) if view.get_function_at(vfunc_addr): vfunc_count += 1 continue # explicitly reject null pointers; in position-independent code # address zero can belong to the executable segment if vfunc_addr == 0: check_next = False break # heuristic: pointer to executable memory vfunc_segment = view.get_segment_at(vfunc_addr) if vfunc_addr != 0 and vfunc_segment and vfunc_segment.executable: view.add_function(vfunc_addr) vfunc_count += 1 log.log_info( 'Discovered function at {:#x} via {}'.format( vfunc_addr, symbol.full_name or symbol.short_name)) changed = True continue # we've fell off the end of the vtable break view.define_data_var(vtable_addr, vtable_ty(vfunc_count)) if check_next: # heuristic: can another vtable follow this one? let's see if it has typeinfo, # since that should be always true for when we have a virtual base typeinfo_ptr = read(arch.address_size) typeinfo_ptr_symbol = view.get_symbol_at(typeinfo_ptr) if typeinfo_ptr_symbol and typeinfo_ptr_symbol.raw_name.startswith( '_ZTI'): vtable_addr = reader.offset - 2 * arch.address_size # documentat it with a symbol secondary_symbol_name = '{}_secondary_{:x}'.format( symbol.short_name, vtable_addr - symbol.address) secondary_symbol = Symbol( SymbolType.DataSymbol, vtable_addr, short_name=secondary_symbol_name) view.define_auto_symbol(secondary_symbol) continue break elif is_code and name_ast.kind == 'func': func = view.get_function_at(symbol.address) demangled = ty_from_demangler_node( name_ast, arg_count_hint=len(func.function_type.parameters)) if demangled is not None: this_arg, ty = demangled func.apply_auto_discovered_type(ty) view.update_analysis() if demangler_failures: log.log_warn('{} demangler failures'.format(demangler_failures))
# PART1: Named Types #------------------------------------------------------------------------------ # example: VoidTypeClass typelib.add_named_type('MyVoidType', Type.void()) # example: BoolTypeClass typelib.add_named_type('MyBoolType', Type.bool()) # example: IntegerTypeClass typelib.add_named_type('MyCharType', Type.char()) typelib.add_named_type('MyIntType', Type.int(4, True)) typelib.add_named_type('MyUnsignedIntType', Type.int(4, False)) # example: FloatTypeClass typelib.add_named_type('MyFloatType', Type.float(4)) # example: PointerTypeClass # char * typelib.add_named_type('MyPointerType', Type.pointer(arch, Type.char())) # example of typedef to primitive type # typedef int MyTypedefType; typelib.add_named_type('MyTypedefType', Type.int(4)) # example of typedef to typedef # typedef MyTypedefType MySuperSpecialType; def create_named_type_reference(type_name: str, to_what: NamedTypeReferenceClass): return NamedTypeReferenceType.create(named_type_class=to_what,