def get_basic_block_rule(bb): ''' create and format a YARA rule for a single basic block. The following bytes are ignored: - relocation bytes - the last jump instruction - direct memory references / immediate values and other igorable data ''' # fetch the instruction start addresses insns = [] va = bb.va while va < bb.va + bb.size: insns.append(va) va = next_head(va) # drop the last instruction if its a jump if insns and is_jump(insns[-1]): insns = insns[:-1] _bytes = [] # `masked_bytes` is the list of formatted bytes, # not yet join'd for performance. masked_bytes = [] cut_bytes_for_hash = '' for va in insns: insn = ida_ua.insn_t() size = ida_ua.decode_insn(insn, va) mnem = insn.get_canon_mnem() op1 = insn.Op1 op2 = insn.Op2 fixup_byte_addrs = set([]) if idaapi.contains_fixups( va, size ): # not work for x64 binaries? (e.g., idaapi.contains_fixups(here(), 0x2d000) -> False) logging.debug('ea = {:#x}, fixups'.format(va)) # fetch the fixup locations and sizes within this one instruction. fixups = [] fva, fsize = get_fixup_va_and_size(va) fixups.append((fva, fsize)) fva += fsize while fva < va + size: fva, fsize = get_fixup_va_and_size( fva - 1) # to detect consecutive fixups fixups.append((fva, fsize)) fva += fsize logging.debug('fixups: {}'.format(fixups)) # compute the addresses of each component byte. for fva, fsize in fixups: for i in range(fva, fva + fsize): fixup_byte_addrs.add(i) # fetch and format each byte of the instruction, # possibly masking it into an unknown byte if its a fixup or several operand types like direct mem ref. masked_types = [o_mem, o_imm, o_displ, o_near, o_far] #masked_types = [o_mem, o_imm, o_near, o_far] bytes_ = get_bytes(va, size) if bytes_ is None: return None for i, byte in enumerate(bytes_): _bytes.append(ord(byte)) byte_addr = i + va if byte_addr in fixup_byte_addrs: logging.debug('{:#x}: fixup byte (masked)'.format(byte_addr)) masked_bytes.append('??') elif op1.type in masked_types and i >= op1.offb and ( i < op2.offb or op2.offb == 0): logging.debug('{:#x}: Op1 masked byte'.format(byte_addr)) masked_bytes.append('??') elif op2.type in masked_types and i >= op2.offb: logging.debug('{:#x}: Op2 masked byte'.format(byte_addr)) masked_bytes.append('??') else: masked_bytes.append('%02X' % (ord(byte))) cut_bytes_for_hash += byte return Rule('$0x%x' % (bb.va), _bytes, masked_bytes, cut_bytes_for_hash)
def get_basic_block_rule(bb): ''' create and format a YARA rule for a single basic block. mask relocation bytes into unknown bytes (like '??'). do not include final instructions if they are jumps. ''' # fetch the instruction start addresses insns = [] va = bb.va while va < bb.va + bb.size: insns.append(va) va = idc.next_head(va) # drop the last instruction if its a jump if is_jump(insns[-1]): insns = insns[:-1] bytes = [] # `masked_bytes` is the list of formatted bytes, # not yet join'd for performance. masked_bytes = [] for va in insns: size = idc.get_item_size(va) if idaapi.contains_fixups(va, size): # fetch the fixup locations within this one instruction. fixups = [] fixupva = idaapi.get_next_fixup_ea(va) fixups.append(fixupva) # TODO: assume the fixup size is four bytes, probably bad. fixupva += 4 while fixupva < va + size: fixupva = idaapi.get_next_fixup_ea(fixupva) fixups.append(fixupva) # TODO: assume the fixup size is four bytes, probably bad. fixupva += 4 # assume each fixup is four bytes (TODO!), # and compute the addresses of each component byte. fixup_byte_addrs = set([]) for fixup in fixups: for i in range(fixup, fixup + 4): fixup_byte_addrs.add(i) # fetch and format each byte of the instruction, # possibly masking it into an unknown byte if its a fixup. for i, byte in enumerate(idc.get_bytes(va, size)): byte_addr = i + va if byte_addr in fixup_byte_addrs: bytes.append(byte) masked_bytes.append('??') else: bytes.append(byte) masked_bytes.append('%02X' % (byte)) elif 'call' in idc.print_insn_mnem(va): for i, byte in enumerate(idc.get_bytes(va, size)): bytes.append(byte) masked_bytes.append('??') else: for byte in idc.get_bytes(va, size): bytes.append(byte) masked_bytes.append('%02X' % (byte)) return Rule('$0x%x' % (bb.va), bytes, masked_bytes)