Example #1
0
def get_basic_block_rule(bb):
    '''
    create and format a YARA rule for a single basic block.
    The following bytes are ignored:
        - relocation bytes
        - the last jump instruction
        - direct memory references / immediate values and other igorable data
    '''
    # fetch the instruction start addresses
    insns = []
    va = bb.va
    while va < bb.va + bb.size:
        insns.append(va)
        va = next_head(va)

    # drop the last instruction if its a jump
    if insns and is_jump(insns[-1]):
        insns = insns[:-1]

    _bytes = []
    # `masked_bytes` is the list of formatted bytes,
    #   not yet join'd for performance.
    masked_bytes = []
    cut_bytes_for_hash = ''
    for va in insns:
        insn = ida_ua.insn_t()
        size = ida_ua.decode_insn(insn, va)
        mnem = insn.get_canon_mnem()
        op1 = insn.Op1
        op2 = insn.Op2

        fixup_byte_addrs = set([])
        if idaapi.contains_fixups(
                va, size
        ):  # not work for x64 binaries? (e.g., idaapi.contains_fixups(here(), 0x2d000) -> False)
            logging.debug('ea = {:#x}, fixups'.format(va))
            # fetch the fixup locations and sizes within this one instruction.
            fixups = []
            fva, fsize = get_fixup_va_and_size(va)
            fixups.append((fva, fsize))
            fva += fsize
            while fva < va + size:
                fva, fsize = get_fixup_va_and_size(
                    fva - 1)  # to detect consecutive fixups
                fixups.append((fva, fsize))
                fva += fsize
            logging.debug('fixups: {}'.format(fixups))
            # compute the addresses of each component byte.
            for fva, fsize in fixups:
                for i in range(fva, fva + fsize):
                    fixup_byte_addrs.add(i)

        # fetch and format each byte of the instruction,
        #  possibly masking it into an unknown byte if its a fixup or several operand types like direct mem ref.
        masked_types = [o_mem, o_imm, o_displ, o_near, o_far]
        #masked_types = [o_mem, o_imm, o_near, o_far]
        bytes_ = get_bytes(va, size)
        if bytes_ is None:
            return None
        for i, byte in enumerate(bytes_):
            _bytes.append(ord(byte))
            byte_addr = i + va
            if byte_addr in fixup_byte_addrs:
                logging.debug('{:#x}: fixup byte (masked)'.format(byte_addr))
                masked_bytes.append('??')
            elif op1.type in masked_types and i >= op1.offb and (
                    i < op2.offb or op2.offb == 0):
                logging.debug('{:#x}: Op1 masked byte'.format(byte_addr))
                masked_bytes.append('??')
            elif op2.type in masked_types and i >= op2.offb:
                logging.debug('{:#x}: Op2 masked byte'.format(byte_addr))
                masked_bytes.append('??')
            else:
                masked_bytes.append('%02X' % (ord(byte)))
                cut_bytes_for_hash += byte

    return Rule('$0x%x' % (bb.va), _bytes, masked_bytes, cut_bytes_for_hash)
Example #2
0
def get_basic_block_rule(bb):
    '''
    create and format a YARA rule for a single basic block.
    mask relocation bytes into unknown bytes (like '??').
    do not include final instructions if they are jumps.
    '''
    # fetch the instruction start addresses
    insns = []
    va = bb.va
    while va < bb.va + bb.size:
        insns.append(va)
        va = idc.next_head(va)

    # drop the last instruction if its a jump
    if is_jump(insns[-1]):
        insns = insns[:-1]

    bytes = []
    # `masked_bytes` is the list of formatted bytes,
    #   not yet join'd for performance.
    masked_bytes = []
    for va in insns:
        size = idc.get_item_size(va)
        if idaapi.contains_fixups(va, size):
            # fetch the fixup locations within this one instruction.
            fixups = []
            fixupva = idaapi.get_next_fixup_ea(va)
            fixups.append(fixupva)
            # TODO: assume the fixup size is four bytes, probably bad.
            fixupva += 4

            while fixupva < va + size:
                fixupva = idaapi.get_next_fixup_ea(fixupva)
                fixups.append(fixupva)
                # TODO: assume the fixup size is four bytes, probably bad.
                fixupva += 4

            # assume each fixup is four bytes (TODO!),
            #  and compute the addresses of each component byte.
            fixup_byte_addrs = set([])
            for fixup in fixups:
                for i in range(fixup, fixup + 4):
                    fixup_byte_addrs.add(i)

            # fetch and format each byte of the instruction,
            #  possibly masking it into an unknown byte if its a fixup.
            for i, byte in enumerate(idc.get_bytes(va, size)):
                byte_addr = i + va
                if byte_addr in fixup_byte_addrs:
                    bytes.append(byte)
                    masked_bytes.append('??')
                else:
                    bytes.append(byte)
                    masked_bytes.append('%02X' % (byte))
        elif 'call' in idc.print_insn_mnem(va):
            for i, byte in enumerate(idc.get_bytes(va, size)):
                bytes.append(byte)
                masked_bytes.append('??')
        else:
            for byte in idc.get_bytes(va, size):
                bytes.append(byte)
                masked_bytes.append('%02X' % (byte))

    return Rule('$0x%x' % (bb.va), bytes, masked_bytes)