Example #1
0
def import_ida(json_file, bv, options):
    if json_file is None:
        return False, "No json file specified"

    imported = None

    try:
        f = open(json_file, "rb")
        imported = json.load(f)
    except Exception as e:
        return False, "Failed to parse json file {} {}".format(json_file, e)

    resolved_functions = imported["functions"]
    resolved_strings = imported["strings"]

    # TODO: import segments
    # TODO: Handle Conflicts

    if options.import_functions:
        log("Applying import data", options.verbose)
        for name, rec in resolved_functions.items():
            bv.add_function(rec["start"])
            func = bv.get_function_at(rec["start"])
            if name != ("sub_%x" % rec["start"]):
                func.name = name

            if options.import_comments:
                if "comment" in rec:
                    func.comment = rec["comment"]

                if "comments" in rec:
                    for comment, addr in rec["comments"].items():
                        func.set_comment_at(addr, comment)

            if "can_return" in rec:
                func.can_return = rec["can_return"]

        bv.update_analysis_and_wait()

    if options.import_strings:
        log("Importing string types", options.verbose)
        for addr, (name, length, t, data_refs) in resolved_strings.items():
            bv.define_user_data_var(int(addr), Type.array(Type.int(1, None, "char"), length))
            if options.import_strings_names:
                bv.define_user_symbol(Symbol(SymbolType.DataSymbol, int(addr), name))
            for ref in data_refs:  # references to this data
                for block in bv.get_basic_blocks_at(ref):  # find any references in code
                    for i in block.get_disassembly_text():  # go through all the instructions in the block
                        if i.address == ref:
                            for token in i.tokens:
                                if token.type == InstructionTextTokenType.PossibleAddressToken:
                                    print "setting token", i.address, token.value, token.operand, IntegerDisplayType.PointerDisplayType, block.arch
                                    block.function.set_int_display_type(i.address, token.value, token.operand, IntegerDisplayType.PointerDisplayType, block.arch)
                                    break

    log("Updating Analysis", options.verbose)
    bv.update_analysis_and_wait()
    return True, None
 def vtable_ty(vfunc_count):
     try:
         vtable_struct = StructureBuilder.create()
     except NameError:
         vtable_struct = Structure()
     vtable_struct.append(signed_int_ty, 'top_offset')
     vtable_struct.append(base_type_info_ptr_ty, 'typeinfo')
     vtable_struct.append(Type.array(void_p_ty, vfunc_count), 'functions')
     return Type.structure_type(vtable_struct)
    def init(self):

        user_choice = get_choice_input("Select MCU family", "MCU selection",
                                       self.MCUS)
        if user_choice is not None:
            chosen_mcu = self.MCUS[user_choice]
            mcu_lib = importlib.import_module("binaryninja_cortex.platforms." +
                                              chosen_mcu)
            mcu = mcu_lib.Chip
        else:
            mcu_lib = importlib.import_module("binaryninja_cortex.platforms")
            mcu = mcu_lib.MCU

        #Add RAM segment
        self.add_auto_segment(
            mcu.RAM_OFF, 0xffff, 0, 0, SegmentFlag.SegmentReadable
            | SegmentFlag.SegmentWritable | SegmentFlag.SegmentExecutable)

        # Add peripherals segment
        self.add_auto_segment(
            mcu.PERIPH_OFF, 0x10000000, 0, 0,
            SegmentFlag.SegmentReadable | SegmentFlag.SegmentWritable)

        #Add flash segment, assume flash < 2MB
        self.add_auto_segment(
            mcu.ROM_OFF, 0x200000, 0, 0x200000,
            SegmentFlag.SegmentReadable | SegmentFlag.SegmentExecutable)

        #Add IVT symbols

        #SP_VALUE is a data pointer
        self.define_auto_symbol_and_var_or_function(
            Symbol(SymbolType.DataSymbol, mcu.ROM_OFF, mcu.IRQ[0]),
            Type.pointer(self.arch, Type.void(), const=True), self.platform)
        addr = struct.unpack("<I", self.parent_view.read(0, 4))[0]
        self.define_auto_symbol(
            Symbol(SymbolType.DataSymbol, addr, "p_{}".format(mcu.IRQ[0])))

        #All other vectory are function pointers
        for i in range(1, len(mcu.IRQ)):
            self.define_auto_symbol_and_var_or_function(
                Symbol(SymbolType.DataSymbol, mcu.ROM_OFF + (4 * i),
                       mcu.IRQ[i]),
                Type.pointer(self.arch, Type.void(), const=True),
                self.platform)
            addr = struct.unpack("<I", self.parent_view.read(4 * i, 4))[0] & ~1
            self.define_auto_symbol(
                Symbol(SymbolType.FunctionSymbol, addr,
                       "f_{}".format(mcu.IRQ[i])))
            self.add_function(addr, self.platform)

        #Add entry point to RESET_IRQ
        self.add_entry_point(self.symbols['f_RESET_IRQ'].address,
                             self.platform)

        return True
Example #4
0
    def init(self):
        self.platform = Platform['8049_rb0mb0']

        length = len(self.parent_view)

        try:
            # create the data memory segment and section
            self.add_auto_segment(
                0, 128, 0, 0, SegmentFlag.SegmentContainsData
                | SegmentFlag.SegmentReadable | SegmentFlag.SegmentWritable)
            self.add_auto_section(
                '.ram', 0, 128, SectionSemantics.ReadWriteDataSectionSemantics)

            # create the program memory segment, section and entry point
            self.add_auto_segment(
                self.CODE_OFFSET, length, 0, length,
                SegmentFlag.SegmentContainsCode
                | SegmentFlag.SegmentContainsData | SegmentFlag.SegmentReadable
                | SegmentFlag.SegmentExecutable)
            self.add_auto_section(
                '.rom', self.CODE_OFFSET, length,
                SectionSemantics.ReadOnlyCodeSectionSemantics)
            self.add_entry_point(self.CODE_OFFSET)

            self.define_auto_symbol_and_var_or_function(
                Symbol(SymbolType.FunctionSymbol, self.CODE_OFFSET | 0,
                       'reset'), None, self.platform)
            self.define_auto_symbol_and_var_or_function(
                Symbol(SymbolType.FunctionSymbol, self.CODE_OFFSET | 3,
                       'interrupt'), None, self.platform)
            self.define_auto_symbol_and_var_or_function(
                Symbol(SymbolType.FunctionSymbol, self.CODE_OFFSET | 7,
                       'timer'), None, self.platform)

            # working registers
            for n in range(8):
                self.define_auto_symbol_and_var_or_function(
                    Symbol(SymbolType.DataSymbol, n, 'R{}'.format(n)),
                    Type.int(1, False), self.platform)
                self.define_auto_symbol_and_var_or_function(
                    Symbol(SymbolType.DataSymbol, n + 24, 'R{}\''.format(n)),
                    Type.int(1, False), self.platform)

            # stack registers
            for n in range(8):
                self.define_auto_symbol_and_var_or_function(
                    Symbol(SymbolType.DataSymbol, n * 2 + 8, 'S{}'.format(n)),
                    Type.int(2, False), self.platform)

            return True

        except:
            log_error(traceback.format_exc())
            return False
Example #5
0
	def _is_valid_for_data(self, ctxt, view, addr, type, context, ctxCount):
		try:
			file_metadata = FileMetadata(handle=core.BNGetFileForView(view))
			view = BinaryView(file_metadata=file_metadata, handle=core.BNNewViewReference(view))
			type = Type(handle=core.BNNewTypeReference(type))
			pycontext = []
			for i in range(0, ctxCount):
				pycontext.append(Type(core.BNNewTypeReference(context[i])))
			return self.perform_is_valid_for_data(ctxt, view, addr, type, pycontext)
		except:
			log_error(traceback.format_exc())
			return False
Example #6
0
    def _get_lines_for_data(self, ctxt, view, addr, type, prefix, prefixCount,
                            width, count, typeCtx, ctxCount):
        try:
            file_metadata = FileMetadata(handle=core.BNGetFileForView(view))
            view = BinaryView(file_metadata=file_metadata,
                              handle=core.BNNewViewReference(view))
            type = Type(handle=core.BNNewTypeReference(type))

            prefixTokens = InstructionTextToken.get_instruction_lines(
                prefix, prefixCount)
            pycontext = []
            for i in range(ctxCount):
                pycontext.append(Type(core.BNNewTypeReference(typeCtx[i])))

            result = self.perform_get_lines_for_data(ctxt, view, addr, type,
                                                     prefixTokens, width,
                                                     pycontext)

            count[0] = len(result)
            line_buf = (core.BNDisassemblyTextLine * len(result))()
            for i in range(len(result)):
                line = result[i]
                color = line.highlight
                if not isinstance(color,
                                  HighlightStandardColor) and not isinstance(
                                      color, highlight.HighlightColor):
                    raise ValueError(
                        "Specified color is not one of HighlightStandardColor, highlight.HighlightColor"
                    )
                if isinstance(color, HighlightStandardColor):
                    color = highlight.HighlightColor(color)
                line_buf[i].highlight = color._get_core_struct()
                if line.address is None:
                    if len(line.tokens) > 0:
                        line_buf[i].addr = line.tokens[0].address
                    else:
                        line_buf[i].addr = 0
                else:
                    line_buf[i].addr = line.address
                if line.il_instruction is not None:
                    line_buf[i].instrIndex = line.il_instruction.instr_index
                else:
                    line_buf[i].instrIndex = 0xffffffffffffffff

                line_buf[i].count = len(line.tokens)
                line_buf[
                    i].tokens = InstructionTextToken.get_instruction_lines(
                        line.tokens)

            return ctypes.cast(line_buf, ctypes.c_void_p).value
        except:
            log_error(traceback.format_exc())
            return None
def process_msvc_func(func):
    view = func.view
    arch = func.arch
    symbol = func.symbol

    mangled_name = symbol.raw_name

    if mangled_name.startswith('??_7') and not mangled_name.endswith(
            '@@6B@'):  # Skip buggy vtables
        return

    sym_type, sym_parts = demangle_ms(arch, mangled_name)

    if (sym_type is None) or (sym_type.type_class !=
                              TypeClass.FunctionTypeClass):
        return

    if isinstance(sym_parts, str):
        return

    params = [
        v.type for v in sym_type.parameters
        if v.type.type_class != TypeClass.VoidTypeClass
    ]
    return_type = sym_type.return_value

    tokens_before = [str(v) for v in sym_type.get_tokens_before_name()]

    convention = 'cdecl'

    if '__cdecl' in tokens_before:
        convention = 'cdecl'
    elif '__stdcall' in tokens_before:
        convention = 'stdcall'
    elif '__thiscall' in tokens_before:
        convention = 'thiscall'

    if (convention == 'thiscall') and len(sym_parts) >= 2:
        if 'static' not in tokens_before:
            type_name = '::'.join(sym_parts[:-1])
            this_type = Type.pointer(
                arch,
                Type.named_type(
                    NamedTypeReference(
                        NamedTypeReferenceClass.StructNamedTypeClass,
                        name=type_name)))
            params.insert(0, this_type)

    func.function_type = Type.function(return_type, params,
                                       arch.calling_conventions[convention],
                                       sym_type.has_variable_arguments)
Example #8
0
    def import_selected(self):
        selected_type_indexes: List[
            QtCore.QModelIndex] = self.types_table.selectedIndexes()

        selected = set(i.row() for i in selected_type_indexes)

        for row in selected:
            name, type_ = self.types_table.model().types[row]
            self.view.define_user_type(name, type_)

        selected_object_indexes: List[
            QtCore.QModelIndex] = self.objects_table.selectedIndexes()

        selected = set(i.row() for i in selected_object_indexes)

        for row in selected:
            name, type_ = self.objects_table.model().types[row]

            symbol = next(
                (s for s in self.view.get_symbols_by_name(str(name))
                 if s.type in (SymbolType.ImportAddressSymbol,
                               SymbolType.ImportedDataSymbol)),
                None,
            )

            if symbol is None:
                log.log_warn(f"Could not find symbol `{name}` in the binary!")
                continue

            ptr_type = Type.pointer(self.view.arch, type_)
            self.view.define_user_data_var(symbol.address, ptr_type)

        self.view.update_analysis()
 def type_info_ty(kind=None):
     type_info_struct = Structure()
     type_info_struct.append(void_p_ty, 'vtable')
     type_info_struct.append(char_p_ty, 'name')
     if kind == 'si_class':
         type_info_struct.append(base_type_info_ptr_ty, 'base_type')
     return Type.structure_type(type_info_struct)
Example #10
0
def fix_mangled_symbols(thread, view):
    for sym in view.symbols.values():
        if thread.cancelled:
            break
        if not isinstance(sym, Symbol):
            continue

        if sym.short_name.startswith('?') and not sym.raw_name.startswith('?'):
            demangled_type, demangled_name = demangle_ms(
                view.arch, sym.short_name)
            if demangled_type is not None:
                new_symbol = Symbol(
                    sym.type,
                    sym.address,
                    short_name=get_qualified_name(demangled_name),
                    full_name=get_qualified_name(demangled_name),
                    raw_name=sym.short_name,
                    binding=sym.binding,
                    namespace=sym.namespace,
                    ordinal=sym.ordinal)

                view.undefine_user_symbol(sym)
                view.define_user_symbol(new_symbol)
                view.define_user_data_var(new_symbol.address, demangled_type)

                sym = new_symbol

        # Create vtables
        if 'vftable\'' in sym.full_name:
            create_vtable(view, None, sym.address)

        # Create strings
        if sym.raw_name.startswith('??_C@_'):
            view.undefine_user_symbol(sym)
            ascii_string = view.get_ascii_string_at(sym.address)

            if (ascii_string is not None) and (ascii_string.start
                                               == sym.address):
                view.define_user_data_var(
                    sym.address, Type.array(Type.char(), ascii_string.length))

    for func in view.functions:
        if thread.cancelled:
            break
        process_msvc_func(func)
Example #11
0
def make_code(bv: BinaryView, start: int, end: int) -> None:
    if bv.get_basic_blocks_at(start):
        return
    if end - start <= 1:
        # find the next basic block, data variable, or segment/section end
        data_var = bv.get_next_data_var_after(start)
        if data_var is not None:
            end = data_var.address
        else:
            end = bv.end
        end = min(bv.get_next_basic_block_start_after(start), end)
        seg = bv.get_segment_at(start)
        if seg is not None:
            end = min(seg.end, end)
        section_ends = [s.end for s in bv.get_sections_at(start)]
        end = min(*section_ends, end)
    bv.define_data_var(start, Type.array(Type.int(1, False), end - start),
                       f"CODE_{start:08x}")
Example #12
0
def make_struct_here(view: BinaryView, address: int, length: int):
    structure_name = f"struct_{address:x}"

    structure = StructureBuilder()
    structure.width = length

    add_members(view, structure, address, length)

    structure_type = Type.structure_type(structure)

    view.begin_undo_actions()

    view.define_user_type(structure_name, structure_type)

    named_type = Type.named_type_from_type(structure_name, structure_type)

    view.define_user_data_var(address, named_type)

    view.commit_undo_actions()
Example #13
0
def find_dynamically_linked_funcs(bv):
    platform_info = get_platform_info(bv)

    funcs_to_check = set()
    for lookup in platform_info["sym_lookups"]:
        for ref in bv.get_code_refs(lookup):
            ref.function.analysis_skip_override = FunctionAnalysisSkipOverride.NeverSkipFunctionAnalysis
            funcs_to_check.add(ref.function)

    bv.update_analysis()
    time.sleep(1)

    for f in funcs_to_check:
        mlil_ssa = f.medium_level_il.ssa_form

        for call in find_mlil_calls_to_targets(mlil_ssa,
                                               platform_info["sym_lookups"]):
            if len(call.params) < 2 or len(call.output.vars_written) < 1:
                continue

            symbol_name_addr = call.params[1].value
            if symbol_name_addr.type not in [
                    RegisterValueType.ConstantPointerValue,
                    RegisterValueType.ConstantValue
            ]:
                continue

            output_var = call.output.vars_written[0]
            symbol_name = bv.get_ascii_string_at(symbol_name_addr.value).value
            #Add confidence to both the args and the return of zero
            symbol_type = Type.pointer(bv.arch,
                                       bv.parse_type_string("void foo()")[0])

            if len(symbol_name) == 0:
                continue

            bv.define_user_data_var(symbol_name_addr.value,
                                    Type.array(Type.int(1), len(symbol_name)))

            output_name = symbol_name + "@DYN"
            f.create_user_var(output_var.var, symbol_type, output_name)
            propagate_var_name(f, mlil_ssa, output_var, output_name,
                               symbol_type)
 def type_info_ty(kind=None):
     try:
         type_info_struct = StructureBuilder.create()
     except NameError:
         type_info_struct = Structure()
     type_info_struct.append(void_p_ty, 'vtable')
     type_info_struct.append(char_p_ty, 'name')
     if kind == 'si_class':
         type_info_struct.append(base_type_info_ptr_ty, 'base_type')
     return Type.structure_type(type_info_struct)
Example #15
0
def set_symbol_type(bv, sym, type):
	""" Re-type symbol to given type """
	func = bv.get_function_at(sym.address)
	if func:
		func.set_user_type(type)
		return
	dvar = bv.get_data_var_at(sym.address)
	if dvar:
		bv.undefine_data_var(dvar.address)
		bv.define_user_data_var(dvar.address, Type.pointer(bv.arch, type))
		return
Example #16
0
    def _check_and_prop_types_on_call(self, instr: HighLevelILInstruction):
        """Most UEFI modules don't assign globals in the entry function and instead call a initialization routine and
        pass the system table to it where global assignments are made. This function ensures that the types are applied
        to the initialization function params so that we can catch global assignments outside of the module entry

        :param instr: High level IL instruction object
        """

        if instr.operation not in [
                HighLevelILOperation.HLIL_TAILCALL,
                HighLevelILOperation.HLIL_CALL
        ]:
            return

        if instr.dest.operation != HighLevelILOperation.HLIL_CONST_PTR:
            return

        argv_is_passed = False
        for arg in instr.params:
            if 'ImageHandle' in str(
                    arg
            ) or 'SystemTable' or 'FileHandle' or 'PeiServices' in str(arg):
                argv_is_passed = True
                break

        if not argv_is_passed:
            return

        func = self.bv.get_function_at(instr.dest.constant)
        old = func.function_type
        call_args = instr.params
        new_params = []
        for arg, param in zip(call_args, old.parameters):
            if hasattr(arg, 'var'):
                new_type = arg.var.type
            else:
                new_type = param.type
            new_type.confidence = 256
            new_params.append(FunctionParameter(new_type, param.name))

        # TODO: this is a hack to account for odd behavior. func.function_type should be able to set directly to
        # Type.Function(...). However, during testing this isn't the case. I am only able to get it to work if I
        # set function_type to a string and update analysis.
        gross_hack = str(
            Type.function(old.return_value, new_params, old.calling_convention,
                          old.has_variable_arguments,
                          old.stack_adjustment)).replace(
                              '(', '{}('.format(func.name))
        try:
            func.function_type = gross_hack
            self.bv.update_analysis_and_wait()
        except SyntaxError:
            pass  # BN can't parse int48_t and other types despite that it uses it. Ran into this from a sidt instruction
Example #17
0
def load_svd(bv, svd_file=None):
    if not svd_file:
        svd_file = get_open_filename_input("SVD File")
    if isinstance(svd_file, str):
        svd_file = bytes(svd_file, encoding="utf-8")
    if not os.access(svd_file, os.R_OK):
        log_error(f"SVD Browser: Unable to open {svd_file}")
        return
    log_info(f"SVD Loader: Loading {svd_file}")
    device = parse(svd_file)
    peripherals = device['peripherals'].values()
    base_peripherals = [p for p in peripherals if 'derives' not in p]
    derived_peripherals = [p for p in peripherals if 'derives' in p]

    def register_peripheral(p, struct_type):
        bv.add_user_section(p['name'], p['base'], p['size'],
                            SectionSemantics.ReadWriteDataSectionSemantics)
        bv.add_user_segment(
            p['base'], p['size'], 0, 0, SegmentFlag.SegmentContainsData
            | SegmentFlag.SegmentReadable | SegmentFlag.SegmentWritable)
        bv.define_data_var(p['base'], struct_type)
        bv.define_user_symbol(
            Symbol(SymbolType.ImportedDataSymbol, p['base'], p['name']))

    for p in base_peripherals:
        s = Structure()
        for r in p['registers'].values():
            if r['size'] is None:
                s.insert(r['offset'], Type.int(4, False), r['name'])
            else:
                s.insert(r['offset'], Type.int(int(r['size'] / 8), False),
                         r['name'])
        struct_type = Type.structure_type(s)
        bv.define_user_type(p['name'], struct_type)
        register_peripheral(p, struct_type)

    for p in derived_peripherals:
        struct_type = bv.get_type_by_name(
            device['peripherals'][p['derives']]['name'])
        register_peripheral(p, struct_type)
Example #18
0
def load_symbols(thread, view, symbols):
    arch = view.arch

    for name, addr in symbols:
        if not view.is_valid_offset(addr):
            continue

        current_sym = view.get_symbol_at(addr)

        if current_sym is not None:
            if not current_sym.auto:
                continue

            if current_sym.type not in [ SymbolType.DataSymbol, SymbolType.FunctionSymbol ]:
                continue

            view.undefine_user_symbol(current_sym)

        sym_type, sym_parts = demangle_ms(arch, name)

        if sym_type is None:
            sym_type = Type.void()

        if isinstance(sym_parts, str):
            sym_parts = [sym_parts]

        sym_name = '::'.join(sym_parts)

        if '`vftable\'' in sym_name:
            sym_type = Type.void()

        if view.is_offset_executable(addr):
            view.create_user_function(addr)
            view.define_user_symbol(Symbol(SymbolType.FunctionSymbol, addr, sym_name, raw_name = name))
        else:
             view.define_data_var(addr, sym_type)
             view.define_user_symbol(Symbol(SymbolType.DataSymbol, addr, sym_name, raw_name = name))

    fix_mangled_symbols(thread, view)
Example #19
0
 def get_symbol_type(self, sym):
     if self.bv.has_database:
         func = self.bv.get_function_at(sym.address)
         if func:
             return Type.function(
                 func.return_type, [
                     FunctionParameter(
                         param.type, param.name, location=param)
                     for param in func.parameter_vars
                 ],
                 calling_convention=func.calling_convention,
                 variable_arguments=func.has_variable_arguments,
                 stack_adjust=func.stack_adjustment)
         dvar = self.bv.get_data_var_at(sym.address)
         if dvar:
             return dvar.type
     return None
Example #20
0
    def import_all(self):
        self.view.add_type_library(self.lib)

        for name, type_ in self.lib.named_types.items():
            self.view.define_user_type(name, type_)

        for name, type_ in self.lib.named_objects.items():
            symbol = next(
                (s for s in self.view.get_symbols_by_name(str(name))
                 if s.type in (SymbolType.ImportAddressSymbol,
                               SymbolType.ImportedDataSymbol)),
                None,
            )

            if symbol is None:
                continue

            ptr_type = Type.pointer(self.view.arch, type_)
            self.view.define_user_data_var(symbol.address, ptr_type)

        self.view.update_analysis()
Example #21
0
import binaryninja
from binaryninja.enums import NamedTypeReferenceClass
from binaryninja.types import Type, NamedTypeReferenceType, StructureBuilder, EnumerationBuilder

arch = binaryninja.Architecture['x86_64']
typelib = binaryninja.typelibrary.TypeLibrary.new(arch, 'libtest.so.1')
typelib.add_platform(binaryninja.Platform['mac-x86_64'])
typelib.add_alternate_name('libtest.so')

#------------------------------------------------------------------------------
# PART1: Named Types
#------------------------------------------------------------------------------

# example: VoidTypeClass
typelib.add_named_type('MyVoidType', Type.void())

# example: BoolTypeClass
typelib.add_named_type('MyBoolType', Type.bool())

# example: IntegerTypeClass
typelib.add_named_type('MyCharType', Type.char())
typelib.add_named_type('MyIntType', Type.int(4, True))
typelib.add_named_type('MyUnsignedIntType', Type.int(4, False))

# example: FloatTypeClass
typelib.add_named_type('MyFloatType', Type.float(4))

# example: PointerTypeClass
# char *
typelib.add_named_type('MyPointerType', Type.pointer(arch, Type.char()))
Example #22
0
class Intel8086(Architecture):
    name = "8086"
    endianness = Endianness.LittleEndian

    default_int_size = 2
    address_size = 3

    stack_pointer = 'sp'
    regs = {
        # General
        'ax': RegisterInfo('ax', 2, 0),
        'al': RegisterInfo('ax', 1, 0),
        'ah': RegisterInfo('ax', 1, 1),
        'cx': RegisterInfo('cx', 2, 0),
        'cl': RegisterInfo('cx', 1, 0),
        'ch': RegisterInfo('cx', 1, 1),
        'bx': RegisterInfo('bx', 2, 0),
        'bl': RegisterInfo('bx', 1, 0),
        'bh': RegisterInfo('bx', 1, 1),
        'dx': RegisterInfo('dx', 2, 0),
        'dl': RegisterInfo('dx', 1, 0),
        'dh': RegisterInfo('dx', 1, 1),
        'sp': RegisterInfo('sp', 2),
        'bp': RegisterInfo('bp', 2),
        'si': RegisterInfo('si', 2),
        'di': RegisterInfo('di', 2),
        # Segment
        'cs': RegisterInfo('cs', 2),
        'ds': RegisterInfo('ds', 2),
        'es': RegisterInfo('es', 2),
        'ss': RegisterInfo('ss', 2),
        # Instruction pointer
        'ip': RegisterInfo('ip', 2)
    }
    flags = [
        # Status
        'c',  # carry
        'p',  # parity
        'a',  # aux carry
        'z',  # zero
        's',  # sign
        'o',  # overflow
        # Control
        'i',  # interrupt
        'd',  # direction
        't',  # trap
    ]
    flag_roles = {
        'c': FlagRole.CarryFlagRole,
        'p': FlagRole.OddParityFlagRole,
        'a': FlagRole.HalfCarryFlagRole,
        'z': FlagRole.ZeroFlagRole,
        's': FlagRole.NegativeSignFlagRole,
        't': FlagRole.SpecialFlagRole,
        'i': FlagRole.SpecialFlagRole,
        'd': FlagRole.SpecialFlagRole,
        'o': FlagRole.OverflowFlagRole,
    }
    flag_write_types = [
        '',
        '*',
        '!c',
        'co',
    ]
    flags_written_by_flag_write_type = {
        '*': ['c', 'p', 'a', 'z', 's', 'o'],
        '!c': ['p', 'a', 'z', 's', 'o'],
        'co': ['c', 'o'],
    }
    flags_required_for_flag_condition = {
        LowLevelILFlagCondition.LLFC_E: ['z'],
        LowLevelILFlagCondition.LLFC_NE: ['z'],
        LowLevelILFlagCondition.LLFC_SLT: ['s', 'o'],
        LowLevelILFlagCondition.LLFC_ULT: ['c'],
        LowLevelILFlagCondition.LLFC_SLE: ['z', 's', 'o'],
        LowLevelILFlagCondition.LLFC_ULE: ['c', 'z'],
        LowLevelILFlagCondition.LLFC_SGE: ['s', 'o'],
        LowLevelILFlagCondition.LLFC_UGE: ['c'],
        LowLevelILFlagCondition.LLFC_SGT: ['z', 's', 'o'],
        LowLevelILFlagCondition.LLFC_UGT: ['c', 'z'],
        LowLevelILFlagCondition.LLFC_NEG: ['s'],
        LowLevelILFlagCondition.LLFC_POS: ['s'],
        LowLevelILFlagCondition.LLFC_O: ['o'],
        LowLevelILFlagCondition.LLFC_NO: ['o'],
    }

    intrinsics = {
        'outb': IntrinsicInfo([Type.int(2), Type.int(1)], []),
        'outw': IntrinsicInfo([Type.int(2), Type.int(2)], []),
        'inb': IntrinsicInfo([Type.int(1)], [Type.int(2)]),
        'inw': IntrinsicInfo([Type.int(2)], [Type.int(2)]),
    }

    def get_instruction_info(self, data, addr):
        decoded = mc.decode(data, addr)
        if decoded:
            info = InstructionInfo()
            decoded.analyze(info, addr)
            return info

    def get_instruction_text(self, data, addr):
        decoded = mc.decode(data, addr)
        if decoded:
            encoded = data[:decoded.total_length()]
            recoded = mc.encode(decoded, addr)
            if encoded != recoded:
                log_error("Instruction roundtrip error")
                log_error("".join([str(x) for x in decoded.render(addr)]))
                log_error("Orig: {}".format(encoded.hex()))
                log_error("New:  {}".format(recoded.hex()))

            return decoded.render(addr), decoded.total_length()

    def get_instruction_low_level_il(self, data, addr, il):
        decoded = mc.decode(data, addr)
        if decoded:
            decoded.lift(il, addr)
            return decoded.total_length()

    def convert_to_nop(self, data, addr):
        return b'\x90' * len(data)

    def is_always_branch_patch_available(self, data, addr):
        decoded = mc.decode(data, addr)
        if decoded:
            return isinstance(decoded, mc.instr.jmp.JmpCond)

    def always_branch(self, data, addr):
        branch = mc.decode(data, addr)
        branch = branch.to_always()
        return mc.encode(branch, addr)

    def is_invert_branch_patch_available(self, data, addr):
        decoded = mc.decode(data, addr)
        if decoded:
            return isinstance(decoded, mc.instr.jmp.JmpCond)

    def invert_branch(self, data, addr):
        branch = mc.decode(data, addr)
        branch = branch.to_inverted()
        return mc.encode(branch, addr)
Example #23
0
    def init(self):
        self.log(f'Loading {self.name} {self.app_name}')

        self.raw = b''
        self.platform = Architecture[self.ARCH].standalone_platform

        self.reader.seek(self.HDR_SIZE + 4)
        mod_offset = self.reader.read32()
        mod_file_offset = self.HDR_SIZE + mod_offset

        offset = self.HDR_SIZE
        self.make_segment('.text', self.base + self.text_offset, offset,
                          self.text_size)
        offset += self.text_size

        self.make_segment('.rodata', self.base + self.rodata_offset, offset,
                          self.rodata_size)
        offset += self.rodata_size

        self.make_segment('.data', self.base + self.data_offset, offset,
                          self.data_size)
        offset += self.data_size

        self.reader.seek(mod_file_offset)
        if self.reader.read(4) != b'MOD0':
            self.log(f'MOD0(@ {hex(mod_offset)}) Magic invalid')
        else:
            self.log('Parsing MOD0')
            self.dynamic_offset = mod_offset + self.up_signed(
                self.reader.read(4), 4)
            dynamic_file_offset = self.HDR_SIZE + self.dynamic_offset
            if self.bss_offset == 0:
                self.bss_offset = mod_offset + self.up_signed(
                    self.reader.read(4), 4)

            dynamic_size = self.bss_offset - self.dynamic_offset
            if self.bss_size == 0:
                bss_end = mod_offset + self.up_signed(self.reader.read(4), 4)
                self.bss_size = bss_end - self.bss_offset

            self.reader.seek(mod_file_offset + 0x10)
            self.eh_frame_hdr_start = mod_offset + self.up_signed(
                self.reader.read(4), 4)
            eh_frame_hdr_end = mod_offset + self.up_signed(
                self.reader.read(4), 4)
            self.eh_frame_hdr_size = eh_frame_hdr_end - self.eh_frame_hdr_start
            self.module_offset = mod_offset + self.up_signed(
                self.reader.read(4), 4)

            libnx = False
            if self.reader.read(4) == b'LNY0':
                libnx = True
                libnx_got_start = mod_offset + self.up_signed(
                    self.reader.read(4), 4)
                libnx_got_end = mod_offset + self.up_signed(
                    self.reader.read(4), 4)
                self.make_section('.got', self.base + libnx_got_start,
                                  libnx_got_end - libnx_got_start)

            self.reader.seek(dynamic_file_offset)
            tag1 = self.reader.read64()
            self.reader.seek(dynamic_file_offset + 0x10)
            tag2 = self.reader.read64()
            self.reader.seek(dynamic_file_offset)
            self.armv7 = tag1 > 0xFFFFFFFF or tag2 > 0xFFFFFFFF
            offset_size = 4 if self.armv7 else 8
            self.reader.seek(dynamic_file_offset)
            self.dynamic = {x: [] for x in MULTIPLE_DTS}
            for index in range(dynamic_size // 0x10):
                if self.armv7:
                    tag = self.reader.read32()
                    val = self.reader.read32()
                else:
                    tag = self.reader.read64()
                    val = self.reader.read64()

                if tag == DT_NULL:
                    break

                if tag in MULTIPLE_DTS:
                    self.dynamic[tag].append(val)
                else:
                    self.dynamic[tag] = val
            self.make_section('.dynamic', self.base + self.dynamic_offset,
                              dynamic_size)

            if DT_STRTAB in self.dynamic and DT_STRSZ in self.dynamic:
                self.log("Reading .dynstr")
                self.reader.seek(self.HDR_SIZE + self.dynamic[DT_STRTAB])
                self.dynstr = self.reader.read(self.dynamic[DT_STRSZ])

            for start_key, size_key, name in [
                (DT_STRTAB, DT_STRSZ, '.dynstr'),
                (DT_INIT_ARRAY, DT_INIT_ARRAYSZ, '.init_array'),
                (DT_FINI_ARRAY, DT_FINI_ARRAYSZ, '.fini_array'),
                (DT_RELA, DT_RELASZ, '.rela.dyn'),
                (DT_REL, DT_RELSZ, '.rel.dyn'),
                (DT_JMPREL, DT_PLTRELSZ,
                 ('.rel.plt' if self.armv7 else '.rela.plt')),
            ]:
                if start_key in self.dynamic and size_key in self.dynamic:
                    self.make_section(name,
                                      self.base + self.dynamic[start_key],
                                      self.dynamic[size_key])

            needed = [self.get_dynstr(i) for i in self.dynamic[DT_NEEDED]]

            self.syms = [
            ]  # symbols, symbols is already an attribute for BinaryView
            if DT_SYMTAB in self.dynamic and DT_STRTAB in self.dynamic:
                self.reader.seek(self.HDR_SIZE + self.dynamic[DT_SYMTAB])
                while True:
                    if self.dynamic[DT_SYMTAB] < self.dynamic[
                            DT_STRTAB] and self.reader.offset - self.HDR_SIZE >= self.dynamic[
                                DT_STRTAB]:
                        break

                    if self.armv7:
                        st_name = self.reader.read32()
                        st_value = self.reader.read32()
                        st_size = self.reader.read32()
                        st_info = self.reader.read8()
                        st_other = self.reader.read8()
                        st_shndx = self.reader.read16()
                    else:
                        st_name = self.reader.read32()
                        st_info = self.reader.read8()
                        st_other = self.reader.read8()
                        st_shndx = self.reader.read16()
                        st_value = self.reader.read64()
                        st_size = self.reader.read64()

                    if st_name > len(self.dynstr):
                        break

                    self.syms.append(
                        ElfSym(self.get_dynstr(st_name), st_info, st_other,
                               st_shndx, st_value, st_size))
                self.make_section('.dynsym',
                                  self.base + self.dynamic[DT_SYMTAB],
                                  (self.reader.offset - self.HDR_SIZE) -
                                  self.dynamic[DT_SYMTAB])

            locations = set()
            plt_got_end = None
            if DT_REL in self.dynamic and DT_RELSZ in self.dynamic:
                locations |= self.process_relocations(self.dynamic[DT_REL],
                                                      self.dynamic[DT_RELSZ])

            if DT_RELA in self.dynamic and DT_RELASZ in self.dynamic:
                locations |= self.process_relocations(self.dynamic[DT_RELA],
                                                      self.dynamic[DT_RELASZ])

            if DT_JMPREL in self.dynamic and DT_PLTRELSZ in self.dynamic:
                plt_locations = self.process_relocations(
                    self.dynamic[DT_JMPREL], self.dynamic[DT_PLTRELSZ])
                locations |= plt_locations

                plt_got_start = min(plt_locations)
                plt_got_end = max(plt_locations) + offset_size
                if DT_PLTGOT in self.dynamic:
                    self.make_section('.got.plt',
                                      self.base + self.dynamic[DT_PLTGOT],
                                      plt_got_end - plt_got_start)

                if not self.armv7:
                    self.reader.seek(self.HDR_SIZE)
                    text = self.reader.read(self.text_size)
                    last = 12
                    while True:  # This block was straight copy pasted from https://github.com/reswitched/loaders/blob/30a2f1f1d6c997a46cc4225c1f443c19d21fc66c/nxo64.py#L406
                        pos = text.find(pack('<I', 0xD61F0220), last)
                        if pos == -1: break
                        last = pos + 1
                        if (pos % 4) != 0: continue
                        off = pos - 12
                        a, b, c, d = unpack_from('<IIII', text, off)
                        if d == 0xD61F0220 and (
                                a & 0x9f00001f) == 0x90000010 and (
                                    b & 0xffe003ff) == 0xf9400211:
                            base = off & ~0xFFF
                            immhi = (a >> 5) & 0x7ffff
                            immlo = (a >> 29) & 3
                            paddr = base + ((immlo << 12) | (immhi << 14))
                            poff = ((b >> 10) & 0xfff) << 3
                            target = paddr + poff
                            if plt_got_start <= target < plt_got_end:
                                self.plt_entries.append((off, target))
                    text = b''
                    plt_start = min(self.plt_entries)[0]
                    plt_end = max(self.plt_entries)[0] + 0x10
                    self.make_section('.plt', self.base + plt_start,
                                      plt_end - plt_start)

                if not libnx:
                    if plt_got_end is not None:
                        got_ok = False
                        got_end = plt_got_end + offset_size
                        while got_end in locations and (
                                DT_INIT_ARRAY not in self.dynamic
                                or got_end < self.dynamic[DT_INIT_ARRAY]):
                            got_ok = True
                            got_end += offset_size

                        if got_ok:
                            self.make_section('.got', self.base + plt_got_end,
                                              got_end - plt_got_end)
            else:
                plt_got_start = 0
                plt_got_end = 0

        self.bss_offset = self.bss_offset
        self.bss_size = self.page_align_up(self.bss_size)
        self.make_segment('.bss',
                          self.base + self.bss_offset,
                          0,
                          self.bss_size,
                          empty=True)

        undefined_count = 0
        for sym in self.syms:
            if not sym.shndx and sym.name:
                undefined_count += 1
        last_ea = max([self.base + seg.end for seg in self.segments])

        undef_ea = self.page_align_up(last_ea) + 8
        undef_offset = self.base + plt_got_start
        for idx, symbol in enumerate(self.syms):
            if symbol.name:
                symbol.resolved = self.base + symbol.value
                decoded_type, decoded_name = self.try_unmangle(symbol.name)

                if symbol.shndx:
                    if symbol.type == STT_FUNC:
                        self.create_user_function(symbol.resolved)
                        self.define_user_symbol(
                            Symbol(SymbolType.FunctionSymbol, symbol.resolved,
                                   decoded_name))

                        if decoded_type is not None:
                            self.get_function_at(
                                symbol.resolved).set_user_type(decoded_type)
                    else:
                        if decoded_type is not None:
                            self.define_data_var(symbol.resolved, decoded_type)
                        self.define_user_symbol(
                            Symbol(SymbolType.DataSymbol, symbol.resolved,
                                   decoded_name))
                else:
                    self.define_user_symbol(
                        Symbol(SymbolType.ImportedFunctionSymbol, undef_ea,
                               decoded_name))
                    undef_ea += offset_size

        got_name_lookup = {}
        for offset, r_type, symbol, addend in self.relocations:
            target = self.base + offset
            if symbol != None:
                decoded_type, decoded_name = self.try_unmangle(symbol.name)
                if decoded_type != None:
                    self.define_data_var(
                        target,
                        Type.pointer(Architecture[self.ARCH], decoded_type))
                self.define_auto_symbol(
                    Symbol(SymbolType.DataSymbol, target, decoded_name))
            else:
                decoded_type = decoded_name = None

            packed = None
            offset_raw = None
            if r_type in [R_ARM_GLOB_DAT, R_ARM_JUMP_SLOT, R_ARM_ABS32]:
                if symbol:
                    offset_raw = symbol.resolved
                    packed = pack(LE + UNSIGNED_SIZE_MAP[4], offset_raw)
            elif r_type == R_ARM_RELATIVE:
                self.reader.seek(target)
                offset_raw = self.base + self.reader.read32()
                packed = pack(LE + UNSIGNED_SIZE_MAP[4], offset_raw)
            elif r_type in [
                    R_AARCH64_GLOB_DAT, R_AARCH64_JUMP_SLOT, R_AARCH64_ABS64
            ]:
                offset_raw = symbol.resolved + addend
                packed = pack(LE + UNSIGNED_SIZE_MAP[8], offset_raw)
                if addend == 0:
                    got_name_lookup[offset] = symbol.name
            elif r_type == R_AARCH64_RELATIVE:
                offset_raw = self.base + addend
                packed = pack(LE + UNSIGNED_SIZE_MAP[8], offset_raw)

            if packed is not None:
                if offset_raw != self.base and offset_raw != self.base + 0x10 and offset_raw < self.base + self.text_offset + self.text_size:
                    self.create_user_function(offset_raw)
                    if decoded_type is not None:
                        self.get_function_at(offset_raw).set_user_type(
                            decoded_type)
                    self.write(target, packed)

        for func, target in self.plt_entries:
            if target in got_name_lookup:
                addr = self.base + func
                decoded_type, decoded_name = self.try_unmangle(
                    got_name_lookup[target])
                self.define_user_symbol(
                    Symbol(SymbolType.ImportedFunctionSymbol, addr,
                           decoded_name))

        # Try to find entrypoint if not already set
        if self.entrypoint == 0:
            for sym in self.syms:
                if sym.name == b'_init':
                    self.entrypoint = sym.resolved
                    break
        if self.entrypoint != 0:
            self.add_entry_point(self.entrypoint)

        return True
Example #24
0
import os, sys, re, random, json

import binaryninja
from binaryninja import typelibrary
from binaryninja.types import Type, TypeBuilder, QualifiedName
from binaryninja.enums import NamedTypeReferenceClass

# -----------------------------------------------------------------------------
# attempt #1: use the static named_type_reference() function in Type,
# -----------------------------------------------------------------------------

foo = Type.named_type_reference(
        NamedTypeReferenceClass.UnionNamedTypeClass, # type_class
        'pthread_attr_t', # name
        'libc.so.6:["pthread_attr_t"]', # type_id
        1, # alignment
        56, # width
        True, # const
        False # volatile
    )
print(repr(foo))

if foo.const:
    print("SUCCEEDED")
else:
    try:
        foo.const = True
        print("SUCCEEDED")
    except AttributeError as e:
        print("FAILED")
Example #25
0
                if view.address_size == 4
                else br.read16()
                if view.address_size == 2
                else br.read8()
            )

            # If this field is pointing at a symbol, let's rename the field to be that symbol
            if ptr_symbol := view.get_symbol_at(ptr):
                field_name = simplify_name_to_string(ptr_symbol.short_name).split("::")[
                    -1
                ]

            # If this field points to a function, we should make a function pointer for that
            # function. This is very useful for C++ vtables and other function tables.
            if function := view.get_function_at(ptr):
                type_ = Type.pointer(function.arch, function.function_type)

            # If this field points to a data variable, we should make a pointer to that type.
            elif ptr_dv := view.get_data_var_at(ptr):
                type_ = Type.pointer(view.arch, ptr_dv.type)

        # Determine if this structure should be packed based on the alignment of the field.
        if offset % type_.width != 0:
            structure.packed = True

        structure.insert(offset, type_, field_name)

        if (dv := view.get_next_data_var_after(dv.address)) is None:
            break

        offset = dv.address - start
    def ty_from_demangler_node(node, cv_qual=frozenset(), arg_count_hint=None):
        if node.kind == 'builtin':
            if node.value in ty_for_cxx_builtin:
                return ty_for_cxx_builtin[node.value]
            else:
                return None
        elif node.kind in ['name', 'qual_name']:
            named_ty_ref = NamedTypeReference(name=str(node))
            return Type.named_type(named_ty_ref)
        elif node.kind in ['pointer', 'lvalue', 'rvalue']:
            pointee_ty = ty_from_demangler_node(node.value)
            if pointee_ty is None:
                return None
            is_const = ('const' in cv_qual)
            is_volatile = ('volatile' in cv_qual)
            if node.kind == 'pointer':
                return Type.pointer(arch, pointee_ty, is_const, is_volatile)
            elif node.kind == 'lvalue':
                return Type.pointer(
                    arch,
                    pointee_ty,
                    is_const,
                    is_volatile,
                    ref_type=ReferenceType.ReferenceReferenceType)
            elif node.kind == 'rvalue':
                return Type.pointer(arch,
                                    pointee_ty,
                                    is_const,
                                    is_volatile,
                                    ref_type=ReferenceType.RValueReferenceType)
        elif node.kind == 'cv_qual':
            return ty_from_demangler_node(node.value, cv_qual=node.qual)
        elif node.kind == 'func':
            is_ctor_dtor = False
            if node.name and node.name.kind == 'qual_name':
                qual_name = node.name.value
                if qual_name[-1].kind in ['ctor', 'dtor']:
                    is_ctor_dtor = True

            if is_ctor_dtor:
                ret_ty = Type.void()
            elif node.ret_ty is not None:
                ret_ty = ty_from_demangler_node(node.ret_ty)
                if ret_ty is None:
                    return None
            else:
                ret_ty = Type.int(arch.default_int_size).with_confidence(0)

            arg_nodes = list(node.arg_tys)
            arg_tys = []

            var_arg = False
            if arg_nodes[-1].kind == 'builtin' and arg_nodes[-1].value == '...':
                arg_nodes.pop()
                var_arg = True
            elif arg_nodes[0].kind == 'builtin' and arg_nodes[
                    0].value == 'void':
                arg_nodes = arg_nodes[1:]

            this_arg = False
            if node.name and node.name.kind == 'qual_name':
                qual_name = node.name.value
                if is_ctor_dtor or (arg_count_hint is not None
                                    and len(arg_nodes) == arg_count_hint - 1):
                    this_arg = True
                    this_node = Node('qual_name', qual_name[:-1])
                    this_ty = ty_from_demangler_node(this_node)
                    if this_ty is None:
                        return None
                    arg_tys.append(Type.pointer(arch, this_ty))

            for arg_node in arg_nodes:
                arg_ty = ty_from_demangler_node(arg_node)
                if arg_ty is None:
                    return None
                arg_tys.append(arg_ty)

            ty = Type.function(ret_ty, arg_tys, variable_arguments=var_arg)
            if arg_count_hint is not None:
                # toplevel invocation, so return whether we inferred a this argument
                return this_arg, ty
            else:
                return ty
        else:
            log.log_warn("Cannot convert demangled AST {} to a type".format(
                repr(node)))
def analyze_cxx_abi(view, start=None, length=None, task=None):
    platform = view.platform
    arch = platform.arch

    void_p_ty = Type.pointer(arch, Type.void())
    char_p_ty = Type.pointer(arch, Type.int(1))
    unsigned_int_ty = Type.int(arch.default_int_size, False)
    signed_int_ty = Type.int(arch.default_int_size, True)

    base_type_info_ty = Type.named_type(
        NamedTypeReference(name='std::type_info'))
    base_type_info_ptr_ty = Type.pointer(arch, base_type_info_ty)

    def char_array_ty(length):
        return Type.array(Type.int(1), strings[0].length)

    def type_info_ty(kind=None):
        type_info_struct = Structure()
        type_info_struct.append(void_p_ty, 'vtable')
        type_info_struct.append(char_p_ty, 'name')
        if kind == 'si_class':
            type_info_struct.append(base_type_info_ptr_ty, 'base_type')
        return Type.structure_type(type_info_struct)

    def vtable_ty(vfunc_count):
        vtable_struct = Structure()
        vtable_struct.append(signed_int_ty, 'top_offset')
        vtable_struct.append(base_type_info_ptr_ty, 'typeinfo')
        vtable_struct.append(Type.array(void_p_ty, vfunc_count), 'functions')
        return Type.structure_type(vtable_struct)

    if platform.name.startswith("windows-"):
        long_size = arch.default_int_size
    else:
        long_size = arch.address_size

    if arch.name.startswith('x86'):
        char_signed = True
    else:
        char_signed = False  # not always true

    short_size = 2  # not always true
    long_long_size = 8  # not always true

    ty_for_cxx_builtin = {
        'void': Type.void(),
        'wchar_t': Type.int(2, sign=char_signed, altname='wchar_t'),
        'bool': Type.bool(),
        'char': Type.int(1, sign=char_signed),
        'signed char': Type.int(1, sign=True),
        'unsigned char': Type.int(1, sign=False),
        'short': Type.int(short_size, sign=True),
        'unsigned short': Type.int(short_size, sign=False),
        'int': Type.int(arch.default_int_size, sign=True),
        'unsigned int': Type.int(arch.default_int_size, sign=False),
        'long': Type.int(long_size, sign=True),
        'unsigned long': Type.int(long_size, sign=False),
        'long long': Type.int(long_long_size, sign=True),
        'unsigned long long': Type.int(long_long_size, sign=False),
        '__int128': Type.int(16, sign=True),
        'unsigned __int128': Type.int(16, sign=False),
        'float': Type.float(4),
        'double': Type.float(8),
        '__float80': Type.float(10),
        '__float128': Type.float(16),
        'char32_t': Type.int(4, sign=char_signed, altname='char32_t'),
        'char16_t': Type.int(2, sign=char_signed, altname='char16_t'),
    }

    def ty_from_demangler_node(node, cv_qual=frozenset(), arg_count_hint=None):
        if node.kind == 'builtin':
            if node.value in ty_for_cxx_builtin:
                return ty_for_cxx_builtin[node.value]
            else:
                return None
        elif node.kind in ['name', 'qual_name']:
            named_ty_ref = NamedTypeReference(name=str(node))
            return Type.named_type(named_ty_ref)
        elif node.kind in ['pointer', 'lvalue', 'rvalue']:
            pointee_ty = ty_from_demangler_node(node.value)
            if pointee_ty is None:
                return None
            is_const = ('const' in cv_qual)
            is_volatile = ('volatile' in cv_qual)
            if node.kind == 'pointer':
                return Type.pointer(arch, pointee_ty, is_const, is_volatile)
            elif node.kind == 'lvalue':
                return Type.pointer(
                    arch,
                    pointee_ty,
                    is_const,
                    is_volatile,
                    ref_type=ReferenceType.ReferenceReferenceType)
            elif node.kind == 'rvalue':
                return Type.pointer(arch,
                                    pointee_ty,
                                    is_const,
                                    is_volatile,
                                    ref_type=ReferenceType.RValueReferenceType)
        elif node.kind == 'cv_qual':
            return ty_from_demangler_node(node.value, cv_qual=node.qual)
        elif node.kind == 'func':
            is_ctor_dtor = False
            if node.name and node.name.kind == 'qual_name':
                qual_name = node.name.value
                if qual_name[-1].kind in ['ctor', 'dtor']:
                    is_ctor_dtor = True

            if is_ctor_dtor:
                ret_ty = Type.void()
            elif node.ret_ty is not None:
                ret_ty = ty_from_demangler_node(node.ret_ty)
                if ret_ty is None:
                    return None
            else:
                ret_ty = Type.int(arch.default_int_size).with_confidence(0)

            arg_nodes = list(node.arg_tys)
            arg_tys = []

            var_arg = False
            if arg_nodes[-1].kind == 'builtin' and arg_nodes[-1].value == '...':
                arg_nodes.pop()
                var_arg = True
            elif arg_nodes[0].kind == 'builtin' and arg_nodes[
                    0].value == 'void':
                arg_nodes = arg_nodes[1:]

            this_arg = False
            if node.name and node.name.kind == 'qual_name':
                qual_name = node.name.value
                if is_ctor_dtor or (arg_count_hint is not None
                                    and len(arg_nodes) == arg_count_hint - 1):
                    this_arg = True
                    this_node = Node('qual_name', qual_name[:-1])
                    this_ty = ty_from_demangler_node(this_node)
                    if this_ty is None:
                        return None
                    arg_tys.append(Type.pointer(arch, this_ty))

            for arg_node in arg_nodes:
                arg_ty = ty_from_demangler_node(arg_node)
                if arg_ty is None:
                    return None
                arg_tys.append(arg_ty)

            ty = Type.function(ret_ty, arg_tys, variable_arguments=var_arg)
            if arg_count_hint is not None:
                # toplevel invocation, so return whether we inferred a this argument
                return this_arg, ty
            else:
                return ty
        else:
            log.log_warn("Cannot convert demangled AST {} to a type".format(
                repr(node)))

    reader = BinaryReader(view)

    def read(size):
        if size == 4:
            return reader.read32()
        elif size == 8:
            return reader.read64()
        else:
            assert False

    symbols = view.get_symbols(start, length)
    if task:
        task.set_total(len(symbols))

    mangled_re = re.compile('_?_Z')

    demangler_failures = 0
    for symbol in symbols:
        if task and not task.advance():
            break

        if not mangled_re.match(symbol.raw_name):
            continue

        is_data = (symbol.type == SymbolType.DataSymbol)
        is_code = (symbol.type in [
            SymbolType.FunctionSymbol, SymbolType.ImportedFunctionSymbol
        ])

        raw_name, suffix = symbol.raw_name, ''
        if '@' in raw_name:
            match = re.match(r'^(.+?)(@.+)$', raw_name)
            raw_name, suffix = match.group(1), match.group(2)

        try:
            name_ast = parse_mangled(raw_name)
            if name_ast is None:
                log.log_warn(
                    "Demangler failed to recognize {}".format(raw_name))
                demangler_failures += 1
        except NotImplementedError as e:
            log.log_warn("Demangler feature missing on {}: {}".format(
                raw_name, str(e)))
            demangler_failures += 1

        if name_ast:
            if name_ast.kind == 'func':
                short_name = str(name_ast.name)
            else:
                short_name = str(name_ast)
            symbol = Symbol(symbol.type,
                            symbol.address,
                            short_name=short_name + suffix,
                            full_name=str(name_ast) + suffix,
                            raw_name=symbol.raw_name)
        else:
            symbol = Symbol(symbol.type,
                            symbol.address,
                            short_name=symbol.raw_name,
                            full_name=None,
                            raw_name=symbol.raw_name)
        view.define_auto_symbol(symbol)

        if name_ast is None:
            continue

        elif is_data and name_ast.kind == 'typeinfo_name':
            strings = view.get_strings(symbol.address, 1)
            if not strings:
                continue

            view.define_data_var(symbol.address, char_array_ty(length))

        elif is_data and name_ast.kind == 'typeinfo':
            reader.offset = symbol.address + arch.address_size * 2

            kind = None

            # heuristic: is this is an abi::__si_class_type_info?
            base_or_flags = read(arch.default_int_size)
            base_symbol = view.get_symbol_at(base_or_flags)
            if base_symbol and base_symbol.raw_name.startswith('_ZTI'):
                kind = 'si_class'

            view.define_data_var(symbol.address, type_info_ty(kind))

        elif is_data and name_ast.kind == 'vtable':
            vtable_addr = symbol.address

            reader.offset = vtable_addr + arch.address_size * 2
            while True:
                vfunc_count = 0
                check_next = True
                while True:
                    vfunc_ptr_symbol = view.get_symbol_at(reader.offset)
                    if vfunc_ptr_symbol and vfunc_ptr_symbol.raw_name.startswith(
                            '_Z'):
                        # any C++ symbol definitely terminates the vtable
                        check_next = False
                        break

                    # heuristic: existing function
                    vfunc_addr = read(arch.address_size)
                    if view.get_function_at(vfunc_addr):
                        vfunc_count += 1
                        continue

                    # explicitly reject null pointers; in position-independent code
                    # address zero can belong to the executable segment
                    if vfunc_addr == 0:
                        check_next = False
                        break

                    # heuristic: pointer to executable memory
                    vfunc_segment = view.get_segment_at(vfunc_addr)
                    if vfunc_addr != 0 and vfunc_segment and vfunc_segment.executable:
                        view.add_function(vfunc_addr)
                        vfunc_count += 1

                        log.log_info(
                            'Discovered function at {:#x} via {}'.format(
                                vfunc_addr, symbol.full_name
                                or symbol.short_name))
                        changed = True
                        continue

                    # we've fell off the end of the vtable
                    break

                view.define_data_var(vtable_addr, vtable_ty(vfunc_count))

                if check_next:
                    # heuristic: can another vtable follow this one? let's see if it has typeinfo,
                    # since that should be always true for when we have a virtual base
                    typeinfo_ptr = read(arch.address_size)
                    typeinfo_ptr_symbol = view.get_symbol_at(typeinfo_ptr)
                    if typeinfo_ptr_symbol and typeinfo_ptr_symbol.raw_name.startswith(
                            '_ZTI'):
                        vtable_addr = reader.offset - 2 * arch.address_size

                        # documentat it with a symbol
                        secondary_symbol_name = '{}_secondary_{:x}'.format(
                            symbol.short_name, vtable_addr - symbol.address)
                        secondary_symbol = Symbol(
                            SymbolType.DataSymbol,
                            vtable_addr,
                            short_name=secondary_symbol_name)
                        view.define_auto_symbol(secondary_symbol)
                        continue

                break

        elif is_code and name_ast.kind == 'func':
            func = view.get_function_at(symbol.address)
            demangled = ty_from_demangler_node(
                name_ast, arg_count_hint=len(func.function_type.parameters))
            if demangled is not None:
                this_arg, ty = demangled
                func.apply_auto_discovered_type(ty)

    view.update_analysis()

    if demangler_failures:
        log.log_warn('{} demangler failures'.format(demangler_failures))
 def char_array_ty(length):
     return Type.array(Type.int(1), strings[0].length)
Example #29
0
def process_msvc_func(func):
    arch = func.arch
    plat = func.platform

    sym_type, sym_parts = demangle_ms(arch, func.symbol.raw_name)

    if (sym_type is None) or (sym_type.type_class !=
                              TypeClass.FunctionTypeClass):
        return

    if isinstance(sym_parts, str):
        return

    params = [
        v.type for v in sym_type.parameters
        if v.type.type_class != TypeClass.VoidTypeClass
    ]
    return_type = sym_type.return_value

    tokens_before = [str(v) for v in sym_type.get_tokens_before_name()]

    is_member = ('public:' in tokens_before) or (
        'protected:' in tokens_before) or ('private:' in tokens_before)
    is_static = 'static' in tokens_before
    is_virtual = 'virtual' in tokens_before

    convention = plat.default_calling_convention

    if '__cdecl' in tokens_before:
        convention = plat.cdecl_calling_convention
    elif '__stdcall' in tokens_before:
        convention = plat.stdcall_calling_convention
    elif '__fastcall' in tokens_before:
        convention = plat.fastcall_calling_convention
    elif '__thiscall' in tokens_before:
        convention = arch.calling_conventions['thiscall']

    if return_type.type_class == TypeClass.NamedTypeReferenceClass and return_type.named_type_reference.type_class in {
            NamedTypeReferenceClass.ClassNamedTypeClass,
            NamedTypeReferenceClass.StructNamedTypeClass,
            NamedTypeReferenceClass.UnionNamedTypeClass
    }:
        # TODO: This should only added for large/non trivial types
        return_type = Type.pointer(arch, return_type)
        params.insert(0, FunctionParameter(return_type, name="retptr"))

    if len(sym_parts) >= 2 and (is_member or is_virtual) and not is_static:
        type_name = '::'.join(sym_parts[:-1])
        this_type = Type.pointer(
            arch,
            Type.named_type(
                NamedTypeReference(
                    NamedTypeReferenceClass.StructNamedTypeClass,
                    name=type_name)))
        params.insert(0, FunctionParameter(this_type, name="this"))

        if (sym_parts[-1] == sym_parts[-2]) and (return_type.type_class
                                                 == TypeClass.VoidTypeClass):
            return_type = this_type

    func_type = Type.function(return_type, params, convention,
                              sym_type.has_variable_arguments)

    func.function_type = func_type
 def vtable_ty(vfunc_count):
     vtable_struct = Structure()
     vtable_struct.append(signed_int_ty, 'top_offset')
     vtable_struct.append(base_type_info_ptr_ty, 'typeinfo')
     vtable_struct.append(Type.array(void_p_ty, vfunc_count), 'functions')
     return Type.structure_type(vtable_struct)