def get_xrefs(bv, func, il): global _LAST_UNUSED_REFS refs = set() dis = bv.get_disassembly(il.address) # TODO(pag): This is an ugly hack for the ADRP instruction on AArch64. ref = _get_aarch64_partial_xref(bv, func, il, dis) if ref is not None: refs.add(ref) else: reftype = XRef.IMMEDIATE # PC-relative displacement for AArch64's `adr` instruction. if func.arch.name == 'aarch64' and dis.startswith('adr '): reftype = XRef.DISPLACEMENT _fill_xrefs_internal(bv, il, refs, reftype) # TODO(pag): Another ugly hack to deal with a specific flavor of jump # table that McSema doesn't handle very well. The specific form # is: # # .text:00000000004009AC ADRP X1, #asc_400E5C@PAGE ; "\b" # .text:00000000004009B0 ADD X1, X1, #asc_400E5C@PAGEOFF ; "\b" # .text:00000000004009B4 LDR W0, [X1,W0,UXTW#2] # .text:00000000004009B8 ADR X1, loc_4009C4 <-- point to a block # .text:00000000004009BC ADD X0, X1, W0,SXTW#2 # .text:00000000004009C0 BR X0 # # We don't have good ways of referencing basic blocks, so if we # left the reference from `4009B8` to `4009C4`, then that would # be computed in terms of the location in memory of the copied # `.text` segment in the lifted binary. # # We could handle this via a jump-offset table with offset of # `4009B8`, but we don't yet support this variant of jump table # in jmptable.py. if dis.startswith('adr ') and len(refs): ref = refs.pop() if util.is_code(bv, ref.addr) and not bv.get_function_at(ref.addr): DEBUG( "WARNING: Omitting reference to non-function code address {:x}" .format(ref.addr)) else: refs.add(ref) # Add it back in. return refs
def get_xrefs(bv, func, il): global _LAST_UNUSED_REFS refs = set() dis = bv.get_disassembly(il.address) # TODO(pag): This is an ugly hack for the ADRP instruction on AArch64. ref = _get_aarch64_partial_xref(bv, func, il, dis) if ref is not None: refs.add(ref) else: reftype = XRef.IMMEDIATE # PC-relative displacement for AArch64's `adr` instruction. if func.arch.name == 'aarch64' and dis.startswith('adr '): reftype = XRef.DISPLACEMENT _fill_xrefs_internal(bv, il, refs, reftype) # TODO(pag): Another ugly hack to deal with a specific flavor of jump # table that McSema doesn't handle very well. The specific form # is: # # .text:00000000004009AC ADRP X1, #asc_400E5C@PAGE ; "\b" # .text:00000000004009B0 ADD X1, X1, #asc_400E5C@PAGEOFF ; "\b" # .text:00000000004009B4 LDR W0, [X1,W0,UXTW#2] # .text:00000000004009B8 ADR X1, loc_4009C4 <-- point to a block # .text:00000000004009BC ADD X0, X1, W0,SXTW#2 # .text:00000000004009C0 BR X0 # # We don't have good ways of referencing basic blocks, so if we # left the reference from `4009B8` to `4009C4`, then that would # be computed in terms of the location in memory of the copied # `.text` segment in the lifted binary. # # We could handle this via a jump-offset table with offset of # `4009B8`, but we don't yet support this variant of jump table # in jmptable.py. if dis.startswith('adr ') and len(refs): ref = refs.pop() if util.is_code(bv, ref.addr) and not bv.get_function_at(ref.addr): DEBUG("WARNING: Omitting reference to non-function code address {:x}".format(ref.addr)) else: refs.add(ref) # Add it back in. return refs
def recover_section_cross_references(bv, pb_seg, real_sect, sect_start, sect_end): """ Find references to other code/data in this section Args: bv (binja.BinaryView) pb_seg (CFG_pb2.Segment) real_sect (binja.binaryview.Section) sect_start (int) sect_end (int) """ entry_width = util.clamp(real_sect.align, 4, bv.address_size) read_val = {4: util.read_dword, 8: util.read_qword}[entry_width] DEBUG("Recovering references in [{:x}, {:x}) of section {}".format( sect_start, sect_end, real_sect.name)) DEBUG_PUSH() for addr in xrange(sect_start, sect_end, entry_width): xref = read_val(bv, addr) if not util.is_valid_addr(bv, xref): continue # Skip this xref if it's a jmp table entry if any(xref in tbl.targets for tbl in JMP_TABLES): continue width_name = _BYTE_WIDTH_NAME.get(entry_width, "{}-byte".format(entry_width)) DEBUG("Adding {} reference from {:x} to {:x}".format( width_name, addr, xref)) pb_ref = pb_seg.xrefs.add() pb_ref.ea = addr pb_ref.width = entry_width pb_ref.target_ea = xref pb_ref.target_name = util.find_symbol_name(bv, xref) pb_ref.target_is_code = util.is_code(bv, xref) if util.is_tls_section(bv, addr): pb_ref.target_fixup_kind = CFG_pb2.DataReference.OffsetFromThreadBase else: pb_ref.target_fixup_kind = CFG_pb2.DataReference.Absolute DEBUG_POP()
def recover_section_cross_references(bv, pb_seg, real_sect, sect_start, sect_end): """ Find references to other code/data in this section Args: bv (binja.BinaryView) pb_seg (CFG_pb2.Segment) real_sect (binja.binaryview.Section) sect_start (int) sect_end (int) """ entry_width = util.clamp(real_sect.align, 4, bv.address_size) read_val = {4: util.read_dword, 8: util.read_qword}[entry_width] DEBUG("Recovering references in [{:x}, {:x}) of section {}".format( sect_start, sect_end, real_sect.name)) DEBUG_PUSH() for addr in xrange(sect_start, sect_end, entry_width): xref = read_val(bv, addr) if not util.is_valid_addr(bv, xref): continue # Skip this xref if it's a jmp table entry if any(xref in tbl.targets for tbl in JMP_TABLES): continue width_name = _BYTE_WIDTH_NAME.get(entry_width, "{}-byte".format(entry_width)) DEBUG("Adding {} reference from {:x} to {:x}".format(width_name, addr, xref)) pb_ref = pb_seg.xrefs.add() pb_ref.ea = addr pb_ref.width = entry_width pb_ref.target_ea = xref pb_ref.target_name = util.find_symbol_name(bv, xref) pb_ref.target_is_code = util.is_code(bv, xref) if util.is_tls_section(bv, addr): pb_ref.target_fixup_kind = CFG_pb2.DataReference.OffsetFromThreadBase else: pb_ref.target_fixup_kind = CFG_pb2.DataReference.Absolute DEBUG_POP()
def add_xref(bv, pb_inst, target, mask, optype): xref = pb_inst.xrefs.add() xref.ea = target xref.operand_type = optype debug_mask = "" if mask: xref.mask = mask debug_mask = " & {:x}".format(mask) sym_name = util.find_symbol_name(bv, target) if len(sym_name) > 0: xref.name = sym_name if util.is_code(bv, target): xref.target_type = CFG_pb2.CodeReference.CodeTarget debug_type = "code" else: xref.target_type = CFG_pb2.CodeReference.DataTarget debug_type = "data" if util.is_external_ref(bv, target): xref.location = CFG_pb2.CodeReference.External debug_loc = "external" else: xref.location = CFG_pb2.CodeReference.Internal debug_loc = "internal" # If the target happens to be a function, queue it for recovery if bv.get_function_at(target) is not None: queue_func(target) debug_op = _CFG_INST_XREF_TYPE_TO_NAME[optype] return "({} {} {} {:x}{} {})".format( debug_type, debug_op, debug_loc, target, debug_mask, sym_name)
def add_xref(bv, pb_inst, target, mask, optype): xref = pb_inst.xrefs.add() xref.ea = target xref.operand_type = optype debug_mask = "" if mask: xref.mask = mask debug_mask = " & {:x}".format(mask) sym_name = util.find_symbol_name(bv, target) if len(sym_name) > 0: xref.name = sym_name if util.is_code(bv, target): xref.target_type = CFG_pb2.CodeReference.CodeTarget debug_type = "code" else: xref.target_type = CFG_pb2.CodeReference.DataTarget debug_type = "data" if util.is_external_ref(bv, target): xref.location = CFG_pb2.CodeReference.External debug_loc = "external" else: xref.location = CFG_pb2.CodeReference.Internal debug_loc = "internal" # If the target happens to be a function, queue it for recovery if bv.get_function_at(target) is not None: queue_func(target) debug_op = _CFG_INST_XREF_TYPE_TO_NAME[optype] return "({} {} {} {:x}{} {})".format(debug_type, debug_op, debug_loc, target, debug_mask, sym_name)