Example #1
0
def get_xrefs(bv, func, il):
    global _LAST_UNUSED_REFS

    refs = set()
    dis = bv.get_disassembly(il.address)

    # TODO(pag): This is an ugly hack for the ADRP instruction on AArch64.
    ref = _get_aarch64_partial_xref(bv, func, il, dis)
    if ref is not None:
        refs.add(ref)
    else:
        reftype = XRef.IMMEDIATE

        # PC-relative displacement for AArch64's `adr` instruction.
        if func.arch.name == 'aarch64' and dis.startswith('adr '):
            reftype = XRef.DISPLACEMENT

        _fill_xrefs_internal(bv, il, refs, reftype)

        # TODO(pag): Another ugly hack to deal with a specific flavor of jump
        #            table that McSema doesn't handle very well. The specific form
        #            is:
        #
        #    .text:00000000004009AC ADRP            X1, #asc_400E5C@PAGE ; "\b"
        #    .text:00000000004009B0 ADD             X1, X1, #asc_400E5C@PAGEOFF ; "\b"
        #    .text:00000000004009B4 LDR             W0, [X1,W0,UXTW#2]
        #    .text:00000000004009B8 ADR             X1, loc_4009C4   <-- point to a block
        #    .text:00000000004009BC ADD             X0, X1, W0,SXTW#2
        #    .text:00000000004009C0 BR              X0
        #
        #            We don't have good ways of referencing basic blocks, so if we
        #            left the reference from `4009B8` to `4009C4`, then that would
        #            be computed in terms of the location in memory of the copied
        #            `.text` segment in the lifted binary.
        #
        #            We could handle this via a jump-offset table with offset of
        #            `4009B8`, but we don't yet support this variant of jump table
        #            in jmptable.py.
        if dis.startswith('adr ') and len(refs):
            ref = refs.pop()
            if util.is_code(bv, ref.addr) and not bv.get_function_at(ref.addr):
                DEBUG(
                    "WARNING: Omitting reference to non-function code address {:x}"
                    .format(ref.addr))
            else:
                refs.add(ref)  # Add it back in.

    return refs
Example #2
0
def get_xrefs(bv, func, il):
  global _LAST_UNUSED_REFS

  refs = set()
  dis = bv.get_disassembly(il.address)

  # TODO(pag): This is an ugly hack for the ADRP instruction on AArch64.
  ref = _get_aarch64_partial_xref(bv, func, il, dis)
  if ref is not None:
    refs.add(ref)
  else:
    reftype = XRef.IMMEDIATE

    # PC-relative displacement for AArch64's `adr` instruction.
    if func.arch.name == 'aarch64' and dis.startswith('adr '):
      reftype = XRef.DISPLACEMENT

    _fill_xrefs_internal(bv, il, refs, reftype)

    # TODO(pag): Another ugly hack to deal with a specific flavor of jump
    #            table that McSema doesn't handle very well. The specific form
    #            is:
    #
    #    .text:00000000004009AC ADRP            X1, #asc_400E5C@PAGE ; "\b"
    #    .text:00000000004009B0 ADD             X1, X1, #asc_400E5C@PAGEOFF ; "\b"
    #    .text:00000000004009B4 LDR             W0, [X1,W0,UXTW#2]
    #    .text:00000000004009B8 ADR             X1, loc_4009C4   <-- point to a block
    #    .text:00000000004009BC ADD             X0, X1, W0,SXTW#2
    #    .text:00000000004009C0 BR              X0
    #
    #            We don't have good ways of referencing basic blocks, so if we
    #            left the reference from `4009B8` to `4009C4`, then that would
    #            be computed in terms of the location in memory of the copied
    #            `.text` segment in the lifted binary.
    #
    #            We could handle this via a jump-offset table with offset of
    #            `4009B8`, but we don't yet support this variant of jump table
    #            in jmptable.py.
    if dis.startswith('adr ') and len(refs):
      ref = refs.pop()
      if util.is_code(bv, ref.addr) and not bv.get_function_at(ref.addr):
        DEBUG("WARNING: Omitting reference to non-function code address {:x}".format(ref.addr))
      else:
        refs.add(ref)  # Add it back in.

  return refs
Example #3
0
def recover_section_cross_references(bv, pb_seg, real_sect, sect_start,
                                     sect_end):
    """ Find references to other code/data in this section

  Args:
    bv (binja.BinaryView)
    pb_seg (CFG_pb2.Segment)
    real_sect (binja.binaryview.Section)
    sect_start (int)
    sect_end (int)
  """
    entry_width = util.clamp(real_sect.align, 4, bv.address_size)
    read_val = {4: util.read_dword, 8: util.read_qword}[entry_width]

    DEBUG("Recovering references in [{:x}, {:x}) of section {}".format(
        sect_start, sect_end, real_sect.name))

    DEBUG_PUSH()
    for addr in xrange(sect_start, sect_end, entry_width):
        xref = read_val(bv, addr)

        if not util.is_valid_addr(bv, xref):
            continue

        # Skip this xref if it's a jmp table entry
        if any(xref in tbl.targets for tbl in JMP_TABLES):
            continue

        width_name = _BYTE_WIDTH_NAME.get(entry_width,
                                          "{}-byte".format(entry_width))
        DEBUG("Adding {} reference from {:x} to {:x}".format(
            width_name, addr, xref))

        pb_ref = pb_seg.xrefs.add()
        pb_ref.ea = addr
        pb_ref.width = entry_width
        pb_ref.target_ea = xref
        pb_ref.target_name = util.find_symbol_name(bv, xref)
        pb_ref.target_is_code = util.is_code(bv, xref)

        if util.is_tls_section(bv, addr):
            pb_ref.target_fixup_kind = CFG_pb2.DataReference.OffsetFromThreadBase
        else:
            pb_ref.target_fixup_kind = CFG_pb2.DataReference.Absolute

    DEBUG_POP()
Example #4
0
def recover_section_cross_references(bv, pb_seg, real_sect, sect_start, sect_end):
  """ Find references to other code/data in this section

  Args:
    bv (binja.BinaryView)
    pb_seg (CFG_pb2.Segment)
    real_sect (binja.binaryview.Section)
    sect_start (int)
    sect_end (int)
  """
  entry_width = util.clamp(real_sect.align, 4, bv.address_size)
  read_val = {4: util.read_dword,
              8: util.read_qword}[entry_width]

  DEBUG("Recovering references in [{:x}, {:x}) of section {}".format(
      sect_start, sect_end, real_sect.name))

  DEBUG_PUSH()
  for addr in xrange(sect_start, sect_end, entry_width):
    xref = read_val(bv, addr)

    if not util.is_valid_addr(bv, xref):
      continue

    # Skip this xref if it's a jmp table entry
    if any(xref in tbl.targets for tbl in JMP_TABLES):
      continue

    width_name = _BYTE_WIDTH_NAME.get(entry_width, "{}-byte".format(entry_width))
    DEBUG("Adding {} reference from {:x} to {:x}".format(width_name, addr, xref))

    pb_ref = pb_seg.xrefs.add()
    pb_ref.ea = addr
    pb_ref.width = entry_width
    pb_ref.target_ea = xref
    pb_ref.target_name = util.find_symbol_name(bv, xref)
    pb_ref.target_is_code = util.is_code(bv, xref)

    if util.is_tls_section(bv, addr):
      pb_ref.target_fixup_kind = CFG_pb2.DataReference.OffsetFromThreadBase
    else:
      pb_ref.target_fixup_kind = CFG_pb2.DataReference.Absolute

  DEBUG_POP()
Example #5
0
def add_xref(bv, pb_inst, target, mask, optype):
  xref = pb_inst.xrefs.add()
  xref.ea = target
  xref.operand_type = optype

  debug_mask = ""
  if mask:
    xref.mask = mask
    debug_mask = " & {:x}".format(mask)

  sym_name = util.find_symbol_name(bv, target)
  if len(sym_name) > 0:
    xref.name = sym_name

  if util.is_code(bv, target):
    xref.target_type = CFG_pb2.CodeReference.CodeTarget
    debug_type = "code"
  else:
    xref.target_type = CFG_pb2.CodeReference.DataTarget
    debug_type = "data"

  if util.is_external_ref(bv, target):
    xref.location = CFG_pb2.CodeReference.External
    debug_loc = "external"
  else:
    xref.location = CFG_pb2.CodeReference.Internal
    debug_loc = "internal"

  # If the target happens to be a function, queue it for recovery
  if bv.get_function_at(target) is not None:
    queue_func(target)

  debug_op = _CFG_INST_XREF_TYPE_TO_NAME[optype]

  return "({} {} {} {:x}{} {})".format(
      debug_type, debug_op, debug_loc, target, debug_mask, sym_name)
Example #6
0
def add_xref(bv, pb_inst, target, mask, optype):
    xref = pb_inst.xrefs.add()
    xref.ea = target
    xref.operand_type = optype

    debug_mask = ""
    if mask:
        xref.mask = mask
        debug_mask = " & {:x}".format(mask)

    sym_name = util.find_symbol_name(bv, target)
    if len(sym_name) > 0:
        xref.name = sym_name

    if util.is_code(bv, target):
        xref.target_type = CFG_pb2.CodeReference.CodeTarget
        debug_type = "code"
    else:
        xref.target_type = CFG_pb2.CodeReference.DataTarget
        debug_type = "data"

    if util.is_external_ref(bv, target):
        xref.location = CFG_pb2.CodeReference.External
        debug_loc = "external"
    else:
        xref.location = CFG_pb2.CodeReference.Internal
        debug_loc = "internal"

    # If the target happens to be a function, queue it for recovery
    if bv.get_function_at(target) is not None:
        queue_func(target)

    debug_op = _CFG_INST_XREF_TYPE_TO_NAME[optype]

    return "({} {} {} {:x}{} {})".format(debug_type, debug_op, debug_loc,
                                         target, debug_mask, sym_name)