def from_address(cls, address: int, self_type: str, view: BinaryView, is_class=False): if address == 0: return None method_list_t_type = view.get_type_by_name('method_list_t') method_list_t = Type.named_type_from_type('method_list_t', method_list_t_type) method_t = view.get_type_by_name('method_t') if view.get_data_var_at(address) is None: view.define_user_data_var(address, method_list_t) members = get_structure_members(address, method_list_t_type, view) methods = dict() start = address + method_list_t_type.width end = start + members['count'] * method_t.width step = method_t.width for method_addr in range(start, end, step): method = Method.from_address(method_addr, self_type, view, is_class) if method is not None: methods[method.name] = method return cls(address, **members, methods=methods)
def from_address(cls, address: int, view: BinaryView): if address == 0: return None property_list_t_type = view.get_type_by_name('property_list_t') property_list_t = Type.named_type_from_type('property_list_t', property_list_t_type) property_t = view.get_type_by_name('property_t') if view.get_data_var_at(address) is None: view.define_user_data_var(address, property_list_t) members = get_structure_members(address, property_list_t_type, view) properties = dict() start = address + property_list_t_type.width end = start + members['count'] * property_t.width step = property_t.width for property_addr in range(start, end, step): property_ = Property.from_address(property_addr, view) if property_ is not None: properties[property_.name] = property_ return cls(address, **members, properties=properties)
def _parse_function_type(type_string: str, self_name: str, view: BinaryView, is_class=False) -> Type: log_debug(f'_parse_function_type {type_string}') ret_type_str = type_string[0] # Handle structures defined in the function types if ret_type_str == '{': ret_type, type_string = _parse_structure(type_string[1:], view) else: ret_type = _lookup_type(ret_type_str, view) type_string = type_string[1:] stack_size = ''.join(takewhile(str.isdigit, type_string)) type_string = type_string[len(stack_size):] stack_size = int(stack_size) if stack_size else None args = [] while type_string: if type_string[0] == '{': arg_type, type_string = _parse_structure(type_string[1:], view) args.append(Type.pointer(view.arch, arg_type)) else: arg_type = ''.join( takewhile(lambda i: not str.isdigit(i), type_string)) type_string = type_string[len(arg_type):] args.append(_lookup_type(arg_type, view)) arg_stack_offset = ''.join(takewhile(str.isdigit, type_string)) type_string = type_string[len(arg_stack_offset):] # we know that the first parameter is the 'self' parameter if it's not # an objc_msgSend_stret or objc_msgSendSuper_stret. Otherwise it's the # second one. if ret_type.type_class == TypeClass.NamedTypeReferenceClass: log_debug(f'return value is {ret_type}') ret_type = Type.pointer(view.arch, ret_type) args.insert(0, FunctionParameter(ret_type, 'ret_value')) if len(args) < 2: args.append(None) args[1] = FunctionParameter( Type.pointer( view.arch, (Type.named_type_from_type(self_name, view.types[self_name]) if not is_class else Type.named_type_from_type( 'class_t', view.get_type_by_name('class_t')))), 'self') else: args[0] = FunctionParameter( Type.pointer( view.arch, (Type.named_type_from_type(self_name, view.types[self_name]) if not is_class else Type.named_type_from_type( 'class_t', view.get_type_by_name('class_t')))), 'self') function_type = Type.function(ret_type, args) return function_type
def from_address(cls, address: int, view: BinaryView): if address == 0: return None from_bytes = get_from_bytes(view) protocol_list_t_type = view.get_type_by_name('protocol_list_t') protocol_list_t = Type.named_type_from_type('protocol_list_t', protocol_list_t_type) protocol_t = view.get_type_by_name('protocol_t') members = get_structure_members(address, protocol_list_t_type, view) view.define_user_data_var(address, protocol_list_t) protocols = {} start = address + protocol_list_t_type.width end = start + members['count'] * view.address_size step = view.address_size for protocol_ptr in range(start, end, step): if not view.get_data_var_at(protocol_ptr): view.define_user_data_var(protocol_ptr, Type.pointer(view.arch, protocol_t)) protocol = Protocol.from_address( from_bytes(view.read(protocol_ptr, view.address_size)), view) protocols[protocol.name] = protocol return cls(address, **members, protocols=protocols)
def from_address(cls, address: int, class_name: str, view: BinaryView): if address == 0: return None from_bytes = get_from_bytes(view) ivar_list_t_type = view.get_type_by_name('ivar_list_t') ivar_list_t = Type.named_type_from_type('ivar_list_t', ivar_list_t_type) ivar_t = view.get_type_by_name('ivar_t') members = get_structure_members(address, ivar_list_t_type, view) view.define_user_data_var(address, ivar_list_t) ivars = {} start = address + ivar_list_t_type.width end = start + members['count'] * ivar_t.width step = ivar_t.width for ivar in range(start, end, step): new_ivar = Ivar.from_address(ivar, class_name, view) ivars[new_ivar.name] = new_ivar return cls(address, **members, ivars=ivars)
def struct_decl(node: Cursor, bv: bn.BinaryView): struct = bn.Structure() struct.width = node.type.get_size() struct.alignment = node.type.get_align() if node.spelling: struct_name = node.spelling else: # A struct can be defined anonymously and assigned via a typedef, which means the struct_decl node itself # will have no spelling. # example: typedef struct { # DWORD Version; # GUID Guid; # SYSTEM_POWER_CONDITION PowerCondition; # DWORD DataLength; # BYTE Data[1]; # } SET_POWER_SETTING_VALUE, *PSET_POWER_SETTING_VALUE; struct_name = node.type.spelling bn.log.log_debug(f'struct_decl: Processing struct {node.spelling}') # In order to avoid recursion problems with structs, always define the struct name as a binaryNinja forward decl bv.define_user_type(struct_name, bn.Type.structure_type(bn.Structure())) # check if struct is a forward declaration within the source code - if it is not a definition, then it is a forward # decl, and no fields should be defined at this point. if node.is_definition(): for field in node.type.get_fields(): bn.log.log_debug(f'struct_decl: Processing struct field {field.spelling}') if is_recursive_field(field, bv): forward_decl_struct = bn.Structure() forward_decl_struct_name = field.type.get_pointee().get_declaration().spelling bv.define_user_type(forward_decl_struct_name, bn.Type.structure_type(forward_decl_struct)) t = bv.get_type_by_name(forward_decl_struct_name) struct.append(t, forward_decl_struct_name) else: var_type = bv.get_type_by_name(field.spelling) if not var_type: # Need to define the field type var_name, var_type = define_type(field.get_definition(), bv) struct.append(var_type, field.spelling) bn.log.log_debug(f'struct_decl: Successfully processed struct field {field.spelling}') try: if node.kind == CursorKind.UNION_DECL: # set type to union struct.type = bn.StructureType.UnionStructureType bv.define_user_type(struct_name, bn.Type.structure_type(struct)) bn.log.log_debug(f'struct_decl: Successfully processed struct {struct_name}') return struct_name, bn.Type.structure_type(struct) except Exception as e: bn.log.log_debug(f'struct_decl: Failed Processing struct {struct_name} with exception {e}')
def from_address(cls, address: int, view: BinaryView) -> Class: if address == 0: return None elif address in view.session_data['ClassList']: return view.session_data['ClassList'][address] else: new_class = cls(address, view, None, None, None, {}, {}) view.session_data['ClassList'][address] = new_class from_bytes = get_from_bytes(view) members = get_structure_members(address, view.get_type_by_name('class_t'), view) isa = Class.from_address(members['isa'], view) new_class.isa = isa superclass = Class.from_address(members['superclass'], view) new_class.superclass = superclass vtable = ClassRO.from_address(members['vtable'], view, new_class.is_meta) new_class.vtable = vtable class_t = Type.named_type_from_type('class_t', view.get_type_by_name('class_t')) view.define_user_data_var(address, class_t) if not new_class.is_meta: view.session_data['ClassNames'][vtable.name] = new_class else: view.session_data['ClassNames'][f"{vtable.name}_meta"] = new_class if not new_class.is_meta: new_class.define_type() symbol_name = f'_OBJC_CLASS_$_{vtable.name}' else: symbol_name = f'_OBJC_METACLASS_$_{vtable.name}' view.define_user_symbol( Symbol(SymbolType.DataSymbol, address, symbol_name)) if vtable and vtable.baseMethods is not None: new_class._methods = vtable.baseMethods.methods if vtable and vtable.baseProtocols is not None: new_class._protocols = vtable.baseProtocols.protocols return new_class
def _lookup_type(type_string: str, view: BinaryView): if type_string in basic_types: return basic_types[type_string] elif type_string == '*': return Type.pointer(view.arch, Type.char()) elif type_string.startswith('@'): if type_string[2:-1] in view.types: return Type.pointer( view.arch, Type.named_type_from_type(type_string[2:-1], view.types[type_string[2:-1]])) elif type_string != '@?' and type_string != '@': if type_string[2:-1]: new_type = Type.named_type_from_type( type_string[2:-1], Type.structure_type(Structure())) view.define_user_type(type_string[2:-1], new_type) else: new_type = Type.void() return Type.pointer(view.arch, new_type) else: return Type.pointer(view.arch, Type.void()) elif type_string.startswith('#'): return Type.pointer(view.arch, Type.void()) elif type_string == ':': return view.get_type_by_name('SEL') else: return Type.pointer(view.arch, Type.void())
def define_anonymous_type(node: Cursor, bv: bn.BinaryView) -> bn.Type: # An anonymous type must be either a Struct\UNION\ENUM. # In order to simplify working with binaryNinja, an anonymized type is de-anonymized: # The name of the anonymous type is a hash of its location in the source file prepended by 'anon_' bn.log.log_debug(f'define_anonymous_type: Processing {node.type.spelling}') struct = bn.Structure() struct.width = node.type.get_size() struct.alignment = node.type.get_align() struct_name = 'anon_' + xxhash.xxh64_hexdigest(node.type.spelling) for field in node.type.get_fields(): bn_field_type = bv.get_type_by_name(field.spelling) field_name = field.spelling if not bn_field_type: # Need to define the field type # if field.is_anonymous(): # field_name, bn_field_type = define_anonymous_type(field, bv) # else: field_name, bn_field_type = define_type(field.get_definition(), bv) bn.log.log_debug(f'define_anonymous_type: Appending field - {bn_field_type} {field_name}') struct.append(bn_field_type, field_name) # Check if the underlying struct is a union if node.type.kind == TypeKind.ELABORATED: if node.type.get_named_type().get_declaration().kind == CursorKind.UNION_DECL: # set type to union struct.type = bn.StructureType.UnionStructureType return struct_name, bn.Type.structure_type(struct)
def pp(bv: bn.BinaryView): pre_define_types(bv, ntdll) Config.set_library_file(directories_config.libclang_library_file) index: Index = Index.create() tu: TranslationUnit = index.parse( ntdll.header_list[0], args=ntdll.pre_proccessor_args) # args for clang parser root_node = tu.cursor for node in root_node.get_children(): bn.log.log_debug( f'{"*" * 30}\nDEFINING NODE: \n {node.spelling} {node.type.spelling} \n' f'node.kind: {node.kind}, node.type.kind: {node.type.kind}\n {"*" * 30}' ) ast_handlers.define_type(node, bv) # Create the type lib from the parsed types #################################################################### ntdll_tl = bn.TypeLibrary.new(bn.Architecture["x86"], "ntdll.dll") ntdll_tl.add_platform(bn.Platform["windows-x86"]) for node in root_node.get_children(): bn.log.log_debug( f'{"*" * 30}\nEXPORTING NODE: \n {node.spelling} {node.type.spelling} \n' f'node.kind: {node.kind}, node.type.kind: {node.type.kind}\n {"*" * 30}' ) var_type = bv.get_type_by_name(node.spelling) if isinstance(var_type, bn.Type): bv.export_type_to_library(ntdll_tl, node.spelling, var_type) ntdll_tl.finalize() ntdll_tl.write_to_file(directories_config.base_proccessed_header_folder + 'ntdll_type_lib.btl')
def from_address(cls, address: int, view: BinaryView, is_meta=False): from .protocol_t import ProtocolList if address == 0: return None elif address in view.session_data['ClassROList']: return view.session_data['ClassROList'] from_bytes = partial(int.from_bytes, byteorder=("little" if view.endianness == Endianness.LittleEndian else "big")) class_ro_t = Type.named_type_from_type( 'class_ro_t', view.get_type_by_name('class_ro_t')) if view.get_data_var_at(address) is None: view.define_user_data_var(address, class_ro_t) members = { m.name: from_bytes(view.read(address + m.offset, m.type.width)) for m in view.get_type_by_name('class_ro_t').structure.members } members['name'] = (view.get_ascii_string_at(members['name'], 1).value if members['name'] != 0 else '') members['ivarLayout'] = (view.get_ascii_string_at( members['ivarLayout'], 1).value if members['ivarLayout'] != 0 else '') members['ivars'] = IvarList.from_address(members['ivars'], members['name'], view) members['baseMethods'] = MethodList.from_address( members['baseMethods'], members['name'], view, is_meta) members['baseProtocols'] = ProtocolList.from_address( members['baseProtocols'], view) members['baseProperties'] = PropertyList.from_address( members['baseProperties'], view) new_class_ro = cls(address, **members) view.session_data['ClassROList'][address] = new_class_ro return new_class_ro
def typedef_decl(node: Cursor, bv: bn.BinaryView): bn.log.log_debug(f'typedef_decl: {node.underlying_typedef_type.spelling} {node.spelling}, \n' f'underlying_typedef_type: {node.underlying_typedef_type.kind}') if node.spelling and bv.get_type_by_name(node.spelling): bn.log.log_debug(f'typedef_decl: Type already defined') return node.spelling, bv.get_type_by_name(node.spelling) elif not node.underlying_typedef_type.spelling: try: var_type, name = bv.parse_type_string(f'{node.type.spelling} {node.spelling}') except Exception as e: bn.log.log_debug(f'typedef_decl: Failed to parse {node.type.spelling} {node.spelling}, with exception {e}') else: # Sanitize the type - remove any compiler directives such as __aligned and such. underlying_typedef_type_string = remove_compiler_directives(node.underlying_typedef_type.spelling) try: var_type, name = bv.parse_type_string(f'{underlying_typedef_type_string}') # The reason we are not using the name inside the parsed string is that sometimes you get a typedef # like 'int [1] td', and if you parse it like that it's a binaryNinja exception. # instead we parse 'int [1]' and attach the name of the typedef to it afterwards. name = node.spelling bn.log.log_debug(f'typedef_decl: Successfully parsed {underlying_typedef_type_string} {node.spelling}') except SyntaxError as se: if 'syntax error' in str(se): if node.spelling.endswith('_t'): # Some variables names are internal to binaryNinja and cannot be used. These var names usually # end with _t, for example size_t \ ptrdiff_t etc. # In order to not clash with the internal vars, change the _t to _T. altered_spelling = node.spelling[:-1] + 'T' var_type, name = bv.parse_type_string(f'{underlying_typedef_type_string} {altered_spelling}') elif 'is not defined' in str(se): var_type, name = bv.define_user_type(underlying_typedef_type_string) else: bn.log.log_debug(f'typedef_decl: Failed to parse {node.underlying_typedef_type.spelling} ' f'{node.spelling}') try: bv.define_user_type(name, var_type) bn.log.log_debug(f'typedef_decl: Successfully processed {node.underlying_typedef_type.spelling} ' f'{node.spelling}') return str(name), var_type except Exception as e: bn.log.log_debug(f'typedef_decl: Failed Processing {node.underlying_typedef_type.spelling} ' f'{node.spelling} with exception {e}')
def from_address(cls, address: int, view: BinaryView) -> Protocol: if address == 0: return None from .class_t import Class from_bytes = partial(int.from_bytes, byteorder=("little" if view.endianness == Endianness.LittleEndian else "big")) protocol_t = Type.named_type_from_type( 'protocol_t', view.get_type_by_name('protocol_t')) if not view.get_data_var_at(address): view.define_user_data_var(address, protocol_t) members = { m.name: from_bytes(view.read(address + m.offset, m.type.width)) for m in view.get_type_by_name('protocol_t').structure.members } members['isa'] = Class.from_address(members['isa'], view) members['name'] = (view.get_ascii_string_at(members['name'], 1).value if members['name'] != 0 else '') if members['name'] not in view.types: view.define_user_type(members['name'], Type.structure_type(Structure())) members['protocols'] = ProtocolList.from_address( members['protocols'], view) members['instanceMethods'] = MethodList.from_address( members['instanceMethods'], members['name'], view) members['optionalInstanceMethods'] = MethodList.from_address( members['optionalInstanceMethods'], members['name'], view) new_protocol = cls(address, **members) view.session_data['Protocols'][new_protocol.name] = new_protocol return new_protocol
def from_address(cls, address: int, class_name: str, view: BinaryView): if address == 0: return None from_bytes = get_from_bytes(view) ivar_t_type = view.get_type_by_name('ivar_t') ivar_t = Type.named_type_from_type('ivar_t', ivar_t_type) members = get_structure_members(address, ivar_t_type, view) member_dict = {m.name: m for m in ivar_t_type.structure.members} # x64 uses uint64_t for offset, but everything else # uses uint32_t ivar_offset_type = (member_dict['offset'].type.target if view.arch != Architecture['x86_64'] else Type.int( 8, False)) ivar_offset_type.const = True if view.get_data_var_at(address) is None: view.define_user_data_var(address, ivar_t) if members['name'] != 0: name_string = view.get_ascii_string_at(members['name'], 1) if name_string is not None: members['name'] = name_string.value else: members['name'] = '' if members['type']: type_string = view.get_ascii_string_at(members['type'], 1).value members['type'] = _lookup_type(type_string, view) if not members['type']: members['type'] = Type.pointer(view.arch, Type.void()) if members['offset']: view.define_user_data_var(members['offset'], ivar_offset_type) view.define_user_symbol( Symbol(SymbolType.DataSymbol, members['offset'], f'{members["name"]}_offset', namespace=class_name)) members['offset'] = from_bytes( view.read(members['offset'], member_dict['offset'].type.target.width)) else: members['offset'] = None return cls(address, **members)
def define_cfstrings_plugin(view: BinaryView): log_debug("define_cfstrings_plugin") from_bytes = _get_from_bytes(view) cfstring_type = view.get_type_by_name('CFString') if cfstring_type is None: cfstring_type = view.platform.parse_types_from_source( _cfstring_definition).types['CFString'] view.define_user_type('CFString', cfstring_type) wchar_type = view.platform.parse_types_from_source( _wchar_definition).types['wchar'] cfstring = Type.named_type_from_type('CFString', cfstring_type) __cfstring = view.get_section_by_name('__cfstring') if __cfstring is None: return buffer = cfstring_type.structure['buffer'] length = cfstring_type.structure['length'] for addr in range(__cfstring.start, __cfstring.end, cfstring_type.width): view.define_user_data_var(addr, cfstring) for xref in view.get_data_refs(addr): view.define_user_data_var(xref, Type.pointer(view.arch, cfstring)) string_pointer = from_bytes( view.read(addr + buffer.offset, buffer.type.width)) string_length = from_bytes( view.read(addr + length.offset, length.type.width), ) + 1 string_section = view.get_sections_at(string_pointer) if not string_section: return if string_section[0].name == '__ustring': char_type = wchar_type else: char_type = Type.char() view.define_user_data_var(string_pointer, Type.array(char_type, string_length))
def from_address(cls, address: int, self_type: str, view: BinaryView, is_class=False): if address == 0: return None if self_type not in view.types: view.define_user_type(self_type, Type.structure_type(Structure())) method_t_type = view.get_type_by_name('method_t') method_t = Type.named_type_from_type('method_t', method_t_type) if view.get_data_var_at(address) is None: view.define_user_data_var(address, method_t) members = get_structure_members(address, method_t_type, view) members['name'] = (view.get_ascii_string_at(members['name'], 1).value if members['name'] else '') members['types'] = parse_function_type( view.get_ascii_string_at(members['types'], 1).value if members['types'] else '', self_type, view, is_class) members['imp'] = view.get_function_at(members['imp']) if members['imp'] is not None: if not is_class: method_name = f'-[{self_type} {members["name"]}]' else: method_name = f'+[{self_type} {members["name"]}]' if view.symbols.get(method_name): namespace = f'{members["imp"].start}' else: namespace = None view.define_user_symbol( Symbol(SymbolType.FunctionSymbol, members['imp'].start, method_name, namespace=namespace)) if members['types'] is not None: members['imp'].function_type = members['types'] return cls(address, **members)
def _add_xrefs(view: BinaryView): log_debug('_add_xrefs') method_t = view.get_type_by_name('method_t') if method_t is None: return method_t_struct = method_t.structure method_t_name = method_t_struct['name'] for function in view.functions: data_refs = view.get_data_refs(function.start) log_debug(f'{function.name}: {data_refs}') method_t_list = [ var for var in map(view.get_data_var_at, (ref for ref in data_refs)) ] log_debug(f'{function.name}: {method_t_list}') for method in method_t_list: name_ptr = int.from_bytes( view.read(method.address + method_t_name.offset, view.address_size), "little" if view.endianness == Endianness.LittleEndian else "big") for xref in view.get_code_refs(name_ptr): xref_mlil = xref.function.get_low_level_il_at( xref.address).mmlil if xref_mlil is None: log_debug(f'{xref.address:x}') return if xref_mlil.operation == MediumLevelILOperation.MLIL_SET_VAR: call_mlil = next( (use for use in xref_mlil.function.get_ssa_var_uses( xref_mlil.ssa_form.dest) if (use.instr_index > xref_mlil.instr_index and use.il_basic_block == xref_mlil.il_basic_block)), None) else: return if call_mlil is not None: xref.function.add_user_code_ref(call_mlil.address, function.start)
def _define_selectors(view: BinaryView): __objc_selrefs = view.sections.get('__objc_selrefs') if __objc_selrefs is None: raise KeyError('This binary has no __objc_selrefs section') SEL = view.get_type_by_name('SEL') if SEL is None: raise TypeError('The SEL type is not defined!') for addr in range(__objc_selrefs.start, __objc_selrefs.end, SEL.width): view.define_user_data_var(addr, SEL) selector = int.from_bytes(view.read(addr, SEL.width), "little") if selector != 0: name = view.get_ascii_string_at(selector, 3) if name is not None: view.define_user_data_var( name.start, Type.array(Type.char(), name.length + 1))
def _define_protocols(view: BinaryView): __objc_protorefs = view.get_section_by_name('__objc_protorefs') if __objc_protorefs is None: return protocol_t = Type.named_type_from_type('protocol_t', view.get_type_by_name('protocol_t')) for address in range(__objc_protorefs.start, __objc_protorefs.end, view.address_size): view.define_user_data_var(address, Type.pointer(view.arch, protocol_t)) protocol_ptr = int.from_bytes( view.read(address, view.address_size), "little" if view.endianness is Endianness.LittleEndian else "big") new_protocol = Protocol.from_address(protocol_ptr, view)
def from_address(cls, address: int, view: BinaryView): if address == 0: return None property_t_type = view.get_type_by_name('property_t') if property_t_type is None: return property_t = Type.named_type_from_type('property_t', property_t_type) if view.get_data_var_at(address) is None: view.define_user_data_var(address, property_t) members = get_structure_members(address, property_t_type, view) members['name'] = (view.get_ascii_string_at(members['name'], 1).value if members['name'] else '') members['attributes'] = (view.get_ascii_string_at( members['attributes'], 1).value if members['attributes'] else '') return cls(address, **members)
def _define_categories(view: BinaryView): __objc_catlist = view.sections.get('__objc_catlist') if __objc_catlist is None: return category_t = Type.named_type_from_type('category_t', view.get_type_by_name('category_t')) if category_t is None: return start = __objc_catlist.start end = __objc_catlist.end step = view.address_size for address in range(start, end, step): view.define_user_data_var(address, Type.pointer(view.arch, category_t)) category_ptr = int.from_bytes( view.read(address, view.address_size), "little" if view.endianness is Endianness.LittleEndian else "big") new_category = Category.from_address(category_ptr, view)
def from_address(cls, address: int, view: BinaryView) -> Category: if address == 0: return None from .class_t import Class, ClassRO category_t_type = view.get_type_by_name('category_t') if category_t_type is None: return category_t = Type.named_type_from_type('category_t', category_t_type) if view.get_data_var_at(address) is None: view.define_user_data_var(address, category_t) members = get_structure_members(address, category_t_type, view) members['name'] = (view.get_ascii_string_at(members['name'], 1).value if members['name'] else '') members['cls'] = Class.from_address(members['cls'], view) if members['cls'] is None: cls_offset = next(m.offset for m in category_t_type.structure.members if m.name == 'cls') cls_name = view.get_symbol_at(address + cls_offset) cls_name = cls_name.name if cls_name is not None else members[ 'name'] class_match = re.match( r'_OBJC_(META)?CLASS_\$_(?P<classname>[_A-Za-z0-9=/]+)(@GOT)?', cls_name) if class_match is not None: cls_name = class_match.group('classname') cls_ = view.session_data['ClassNames'].get(cls_name) if cls_ is None: cls_ = Class(None, view, None, None, None, {}, {}) cls_.vtable = ClassRO(address, *([None] * 11)) cls_.vtable.name = cls_name view.session_data['ClassNames'][cls_name] = cls_ members['cls'] = cls_ else: cls_name = members['cls'].vtable.name members['instanceMethods'] = MethodList.from_address( members['instanceMethods'], cls_name, view) if members['cls'] is not None and not members['cls'].methods: if members['instanceMethods'] is None: members['cls']._methods = {} else: members['cls']._methods = members['instanceMethods'].methods elif members['cls'] is not None and members['instanceMethods']: members['cls']._methods.update(members['instanceMethods'].methods) members['protocols'] = ProtocolList.from_address( members['protocols'], view) return cls(address, **members)
def pointer_type(node: Cursor, bv: bn.BinaryView): bn.log.log_debug(f'pointer_type: {node.type.spelling} {node.spelling}, \n' f'node.type.kind: {node.type.kind} \n') if node.type.kind == TypeKind.TYPEDEF: pointee_type = node.underlying_typedef_type.get_pointee() elif node.type.kind == TypeKind.POINTER: pointee_type = node.type.get_pointee() else: bn.log.log_debug(f'pointer_type: Unhandled node type: {node.type.kind}') return if check_if_base_type(pointee_type): pointee_type_spelling = pointee_type.spelling if pointee_type_spelling in void_types: # BinaryNinja can't parse the expression 'const void'. pointee_type_spelling = 'void' # If its a base type then no need to define pointee type. bn.log.log_debug(f'pointer_type: Parsing type string: {pointee_type_spelling}') bn_pointee_type, name = bv.parse_type_string(pointee_type_spelling) pointer = bn.Type.pointer(bv.arch, bn_pointee_type) else: pointee_node = pointee_type.get_declaration() if pointee_node.kind == CursorKind.NO_DECL_FOUND: # Some types of TypeKind.TYPEDEF have no declaration node because they the type is just a pointer. # example: typedef EXCEPTION_ROUTINE *PEXCEPTION_ROUTINE; bn.log.log_debug(f'pointer_type: No declaration found for: {pointee_type.spelling} \n' f' pointee_type.kind: {pointee_type.kind}') if pointee_type.kind == TypeKind.FUNCTIONPROTO: # A special case happens when a type is a typedef for a function pointer - the function might be # an anonymous function that was not previously defined, so we must define it first (can't just parse # the string with parse_type_string(). # Example: typedef void # (__stdcall *PIMAGE_TLS_CALLBACK) ( # PVOID DllHandle, # DWORD Reason, # PVOID Reserved # ); bn_pointee_name, bn_pointee_type = function_decl(node, bv) pointer = bn.Type.pointer(bv.arch, bn_pointee_type) elif pointee_type.kind == TypeKind.FUNCTIONNOPROTO: # FUNCTIONNOPROTO means there are no arguments, only a possible return type pointee_result_type = pointee_type.get_result() if check_if_base_type(pointee_result_type): # Result is a base type, thus no declaration node. # Example: long () pointee_result_string = pointee_result_type.spelling if pointee_result_string in void_types: pointee_result_string = 'void' bn_result_type, bn_result_name = bv.parse_type_string(pointee_result_string) else: result_type = pointee_type.get_result().get_declaration() bn_result_name, bn_result_type = define_type(result_type, bv) pointer = bn.Type.pointer(bv.arch, bn.Type.function(bn_result_type, [])) elif pointee_type.kind == TypeKind.POINTER: # we are dealing with a pointer to a pointer if check_if_base_type(pointee_type.get_pointee()): type_string = pointee_type.get_pointee().spelling if type_string in void_types: type_string = 'void' bn_pointee_type, bn_pointee_name = bv.parse_type_string(type_string) elif pointee_type.get_pointee().kind == TypeKind.POINTER: # We have multiple nested pointers. # Example: int ****a; # The problem here is that if the pointee type is also a pointer, then it has no declaration node, # so we can't call pointer_type() on it directly. nested_pointer_count = 1 current_pointer_type = pointee_type.get_pointee() while current_pointer_type.kind == TypeKind.POINTER: nested_pointer_count += 1 current_pointer_type = current_pointer_type.get_pointee() if check_if_base_type(current_pointer_type): bn_pointee_type, bn_pointee_name = bv.parse_type_string(current_pointer_type.spelling) else: bn_pointee_name, bn_pointee_type = define_type(current_pointer_type, bv) temp_bn_pointer_type = bn.Type.pointer(bv.arch, bn_pointee_type) for nesting_level in range(nested_pointer_count): temp_bn_pointer_type = bn.Type.pointer(bv.arch, temp_bn_pointer_type) bn_pointee_type = bn.Type.pointer(bv.arch, temp_bn_pointer_type) elif pointee_type.get_pointee().get_declaration().kind == CursorKind.NO_DECL_FOUND: # For some reason there is no declaration of the pointee. # Manually parse the type and hope it was previously defined. # TODO: Find a way to handle a case where the type was not already defined. print(f'pointee_type.get_pointee().get_named_type().kind: {pointee_type.get_pointee().get_named_type().kind}') # The reason I am parsing the pointee_type and not pointee_type.get_pointee() is that in some # cases the pointer is pointing to a function prototype that has no declaration, and it is much # easier to just parse the pointer to a known type then parse the underlying type. bn_pointee_type, bn_pointee_name = bv.parse_type_string(pointee_type.spelling) else: bn_pointee_name, bn_pointee_type = define_type(pointee_type.get_pointee().get_declaration(), bv) pointer = bn.Type.pointer(bv.arch, bn_pointee_type) else: bn_pointee_type, bn_pointee_name = bv.parse_type_string(node.underlying_typedef_type.spelling) pointer = bn.Type.pointer(bv.arch, bn_pointee_type) else: bn_pointee_type = bv.get_type_by_name(pointee_node.spelling) if bn_pointee_type is None: # need to define the pointee type before declaring the pointer bn_pointee_name, bn_pointee_type = define_type(pointee_node, bv) pointer = bn.Type.pointer(bv.arch, bn_pointee_type) else: # type already defined in the binaryView. pointer = bn.Type.pointer(bv.arch, bn_pointee_type) bv.define_user_type(node.spelling, pointer) bn.log.log_debug(f'pointer_type: Successfully defined : {node.spelling}') return node.spelling, pointer
def perform_get_lines_for_data(self, ctxt, view: BinaryView, addr: int, type_: Type, prefix: list, width: int, context): from_bytes = _get_from_bytes(view) symbol: Symbol = view.get_symbol_at(addr) cfstring: Type = view.get_type_by_name('CFString') if cfstring is None: log_debug('CFString is not defined; how did we even get here?') return [DisassemblyTextLine(prefix, addr)] cfstring: Structure = cfstring.structure buffer = from_bytes( view.read(addr + cfstring['buffer'].offset, view.address_size)) info = from_bytes( view.read(addr + cfstring['info'].offset, cfstring['info'].type.width)) length = from_bytes( view.read(addr + cfstring['length'].offset, cfstring['length'].type.width)) if info & 0xff == 0xc8: info_string = 'noinline,default,nofree,NI' elif info & 0xff == 0xd0: info_string = 'noinline,default,nofree,EUI' else: info_string = ( f'{_cfstring_allocator_properties[(info >> 5) & 0x3]},' f'{"U" if info & 16 else ""}' f'{"N" if info & 8 else ""}' f'{"L" if info & 4 else ""}' f'{"I" if info & 1 else ""}') if 'U' not in info_string: string = view.get_ascii_string_at(buffer, 0) if string is None: log_debug('string returned None; how did we even get here?') return [DisassemblyTextLine(prefix, addr)] string = string.value else: string = view.read(buffer, length * 2) if symbol is None: name = f'data_{addr:x}' else: name = symbol.short_name prefix = [ InstructionTextToken(InstructionTextTokenType.TypeNameToken, 'CFString'), InstructionTextToken(InstructionTextTokenType.TextToken, ' '), InstructionTextToken(InstructionTextTokenType.AnnotationToken, f'{{{info_string}}}'), InstructionTextToken(InstructionTextTokenType.TextToken, ' '), InstructionTextToken(InstructionTextTokenType.DataSymbolToken, name, addr), InstructionTextToken(InstructionTextTokenType.TextToken, ' = '), InstructionTextToken(InstructionTextTokenType.StringToken, f'{string!r}', buffer), InstructionTextToken(InstructionTextTokenType.TextToken, ' ') ] return [DisassemblyTextLine(prefix, addr)]
def define_type(node: Cursor, bv: bn.BinaryView): bn.log.log_debug(f'define_type: Dispatch for "{node.type.spelling} {node.spelling}", CursorKind: {node.kind}, type ' f'{node.type.spelling}, TypeKind: {node.type.kind}') # Dispatch the correct handler for the declaration recursively. # It is important to check for type kind before we check for cursor kind in order # to detect arrays and such. if node.spelling: # For some reason libclang parses some typedefs (usually ENUM_DECL) as having no spelling, but doesn't # recognize them as anonymous. # BinaryNinja returns a type for the empty string ('') - which causes problems when trying to determine if # the type is already defined. current_type = bv.get_type_by_name(node.type.spelling) else: current_type = None if isinstance(current_type, bn.types.Type): # Check if type already defined. bn.log.log_debug(f'define_type: type {node.spelling} already defined, skipping re-definition.') var_type = current_type var_name = node.spelling return var_name, var_type elif check_if_base_type(node.type): var_type, var_name = bv.parse_type_string(f'{node.type.spelling} {node.spelling}') return str(var_name), var_type elif node.is_anonymous(): return define_anonymous_type(node, bv) elif node.type.kind == TypeKind.ELABORATED: return define_type(node.type.get_declaration(), bv) elif node.type.kind == TypeKind.CONSTANTARRAY: return constantarray_type(node, bv) elif node.type.kind == TypeKind.INCOMPLETEARRAY: return incompletearray_type(node, bv) elif node.type.kind == TypeKind.FUNCTIONPROTO: return functionproto_type(node, bv) elif node.type.kind == TypeKind.POINTER: return pointer_type(node, bv) elif node.kind == CursorKind.TYPEDEF_DECL: if node.type.kind == TypeKind.TYPEDEF: if node.underlying_typedef_type.kind == TypeKind.FUNCTIONPROTO: return function_decl(node, bv) elif node.underlying_typedef_type.kind == TypeKind.POINTER: return pointer_type(node, bv) return typedef_decl(node, bv) elif node.kind == CursorKind.PARM_DECL: if node.type.kind == TypeKind.TYPEDEF: return typedef_decl(node, bv) else: bn.log.log_debug(f'define_type: Unhandled case - node.kind {node.kind}, node.type.kind {node.type.kind}') elif node.kind == CursorKind.VAR_DECL: return var_decl(node, bv) elif node.kind == CursorKind.FUNCTION_DECL: return function_decl(node, bv) elif node.kind == CursorKind.ENUM_DECL: return enum_decl(node, bv) elif node.kind == CursorKind.STRUCT_DECL: return struct_decl(node, bv) elif node.kind == CursorKind.FIELD_DECL: return field_decl(node, bv) elif node.kind == CursorKind.UNION_DECL: return struct_decl(node, bv) else: bn.log.log_info(f'no handler for cursorKind {node.kind}')
def constantarray_type(node: Cursor, bv: bn.BinaryView): bn.log.log_debug(f'constantarray_type: {node.type.spelling} {node.spelling} \n' f' node.kind: {node.kind}, node.type.kind: {node.type.kind}') element_type = node.type.get_array_element_type() bn.log.log_debug(f'constantarray_type: element_type: {element_type.spelling} \n' f' element_type.kind: {element_type.kind}') array = None element_type_node = None bn_element_type = bv.get_type_by_name(element_type.spelling) if bn_element_type: # element type is already defined in the binaryView array = bn.Type.array(bn_element_type, node.type.get_array_size()) bn.log.log_debug(f'constantarray_type: {element_type.spelling} already defined in the binaryView.') elif node.type.get_array_element_type().get_declaration().is_anonymous(): # Anonymous struct\union\enum as the array member type element_type_node = node.type.get_array_element_type().get_declaration() anonymous_name, bn_anonymous_type = define_anonymous_type(element_type_node, bv) array = bn.Type.array(bn_anonymous_type, node.type.get_array_size()) bn.log.log_debug(f'constantarray_type: Successfully proccessed anonymous type: {bn_anonymous_type} .') else: if check_if_base_type(element_type): # If its a base type then it wont apear in bv.get_type_by_name() but it is still defined. var_type, name = bv.parse_type_string(element_type.spelling) array = bn.Type.array(var_type, node.type.get_array_size()) else: # Not a libclang base type, need to define it normally in the binaryView. if node.type.get_array_element_type().kind == TypeKind.POINTER: # The element is a pointer, so it won't have a declaration. # Get the declaration of the pointed type and create a binaryNinja pointer object as the type. if check_if_base_type(node.type.get_array_element_type().get_pointee()): # The pointed type is a base type, parse it directly. bn_element_type, bn_element_name = bv.parse_type_string( node.type.get_array_element_type().get_pointee().spelling ) pointer = bn.Type.pointer(bv.arch, bn_element_type) array = bn.Type.array(pointer, node.type.get_array_size()) else: element_type_node = node.type.get_array_element_type().get_pointee().get_declaration() elif node.type.get_array_element_type().kind == TypeKind.CONSTANTARRAY: # The element type is another constant array, meaning we are dealing with a matrix. # Example: int a[3][4][5] if check_if_base_type(node.type.get_array_element_type().get_array_element_type()): # The underlying matrix type is a base type, parse it directly. bn_element_type, bn_element_name = bv.parse_type_string( node.type.get_array_element_type().get_array_element_type().spelling ) temp_array = bn.Type.array(bn_element_type, node.type.get_array_element_type().get_array_size()) array = bn.Type.array(temp_array, node.type.get_array_size()) else: element_type_node = node.type.get_array_element_type().get_array_element_type().get_declaration() else: element_type_node = node.type.get_array_element_type().get_declaration() if not array: # If array is defined at this point it means we have an array of pointers or a matrix, in which case # it was already handled and defined above. bn_element_name, bn_element_type = define_type(element_type_node, bv) array = bn.Type.array(bn_element_type, node.type.get_array_size()) bv.define_user_type(node.spelling, array) bn.log.log_debug(f'constantarray_type: Successfully defined: {node.type.spelling} {node.spelling}') return node.spelling, array