Esempio n. 1
0
class RISCV64(RISCV):
    name = "riscv64"

    address_size = 8
    default_int_size = 4

    disassembler = RVDisassembler(address_size)
    lifter = Lifter(address_size)

    regs = {
        k: (RegisterInfo(k, 8) if v.size == 4 else RegisterInfo(k, v.size))
        for k, v in RISCV.regs.items()
    }
    def read_register(self, reg_name: str) -> int:
        regs = dict(self.view.session_data.get("emulator.registers", {}))

        if reg_name.startswith('temp'):
            register = RegisterInfo(reg_name, self.view.address_size)
        else:
            register = self.view.arch.regs.get(reg_name)

        if register is None:
            raise UninitializedRegisterError(register)

        full_width_reg = register.full_width_reg

        if reg_name == full_width_reg:
            return regs.get(reg_name, 0)

        offset = register.offset
        size = register.size

        mask = (1 << (offset * 8)) - 1
        mask ^= (1 << ((size + offset) * 8)) - 1

        value = regs.get(full_width_reg, 0)

        value &= mask

        value >>= offset * 8

        return value
Esempio n. 3
0
class RISCV64(RISCV):
    name = "riscv64"

    address_size = 8
    default_int_size = 8
    max_instr_length = 4

    endianness = Endianness.LittleEndian
    disassembler = RVDisassembler(address_size)
    lifter = Lifter(address_size)

    regs = {k: RegisterInfo(k, 8) for k, v in RISCV.regs.items()}
Esempio n. 4
0
class RISCV(Architecture):
    name = "riscv"

    address_size = 4
    default_int_size = 4
    # TODO: This actually depends on whether the F, D, Q extension is
    # implemented, but we'll just assume it is the Q extension (128 bit)
    default_float_size = 16

    # TODO: not sure this is true for all extensions?
    max_instr_length = 4

    endianness = Endianness.LittleEndian

    disassembler = RVDisassembler(address_size)
    lifter = Lifter(address_size)

    # we are using the ABI names here, as those are also the register names
    # returned by capstone.
    regs = {
        # x0 - hard-wired zero
        "zero": RegisterInfo("zero", address_size),
        # x1 - return address (caller saved)
        "ra": RegisterInfo("ra", address_size),
        # x2 - stack pointer (callee saved)
        "sp": RegisterInfo("sp", address_size),
        # x3 - global pointer
        "gp": RegisterInfo("gp", address_size),
        # x4 - threat pointer
        "tp": RegisterInfo("tp", address_size),
        # x5-7 - temporaries (caller saved)
        "t0": RegisterInfo("t0", address_size),
        "t1": RegisterInfo("t1", address_size),
        "t2": RegisterInfo("t2", address_size),
        # x8 - saved register / frame pointer (caller saved)
        "s0": RegisterInfo("s0", address_size),
        # x9 - saved register
        "s1": RegisterInfo("s1", address_size),
        # x10-x11 - first function argument and return value (caller saved)
        "a0": RegisterInfo("a0", address_size),
        "a1": RegisterInfo("a1", address_size),
        # x12-17 - function arguments (caller saved)
        "a2": RegisterInfo("a2", address_size),
        "a3": RegisterInfo("a3", address_size),
        "a4": RegisterInfo("a4", address_size),
        "a5": RegisterInfo("a5", address_size),
        "a6": RegisterInfo("a6", address_size),
        "a7": RegisterInfo("a7", address_size),
        # x18-27 - saved registers (caller saved)
        "s2": RegisterInfo("s2", address_size),
        "s3": RegisterInfo("s3", address_size),
        "s4": RegisterInfo("s4", address_size),
        "s5": RegisterInfo("s5", address_size),
        "s6": RegisterInfo("s6", address_size),
        "s7": RegisterInfo("s7", address_size),
        "s8": RegisterInfo("s8", address_size),
        "s9": RegisterInfo("s9", address_size),
        "s10": RegisterInfo("s10", address_size),
        "s11": RegisterInfo("s11", address_size),
        # x28-31 - temporaries
        "t3": RegisterInfo("t3", address_size),
        "t4": RegisterInfo("t4", address_size),
        "t5": RegisterInfo("t5", address_size),
        "t6": RegisterInfo("t6", address_size),
        # pc (caller saved)
        "pc": RegisterInfo("pc", address_size),

        # f0-7 - FP temporaries (caller saved)
        "ft0": RegisterInfo("ft0", default_float_size),
        "ft1": RegisterInfo("ft1", default_float_size),
        "ft2": RegisterInfo("ft2", default_float_size),
        "ft3": RegisterInfo("ft3", default_float_size),
        "ft4": RegisterInfo("ft4", default_float_size),
        "ft5": RegisterInfo("ft5", default_float_size),
        "ft6": RegisterInfo("ft6", default_float_size),
        "ft7": RegisterInfo("ft7", default_float_size),
        # f8-9 - FP saved registers (callee saved)
        "fs0": RegisterInfo("fs0", default_float_size),
        "fs1": RegisterInfo("fs1", default_float_size),
        # f10-11 - FP arguments/return values (caller saved)
        "fa0": RegisterInfo("fa0", default_float_size),
        "fa1": RegisterInfo("fa1", default_float_size),
        # f12-17 - FP arguments (caller saved)
        "fa2": RegisterInfo("fa2", default_float_size),
        "fa3": RegisterInfo("fa3", default_float_size),
        "fa4": RegisterInfo("fa4", default_float_size),
        "fa5": RegisterInfo("fa5", default_float_size),
        "fa6": RegisterInfo("fa6", default_float_size),
        "fa7": RegisterInfo("fa7", default_float_size),
        # f18–27 - FP saved registers (callee saved)
        "fs2": RegisterInfo("fs2", default_float_size),
        "fs3": RegisterInfo("fs3", default_float_size),
        "fs4": RegisterInfo("fs4", default_float_size),
        "fs5": RegisterInfo("fs5", default_float_size),
        "fs6": RegisterInfo("fs6", default_float_size),
        "fs7": RegisterInfo("fs7", default_float_size),
        "fs8": RegisterInfo("fs8", default_float_size),
        "fs9": RegisterInfo("fs9", default_float_size),
        "fs10": RegisterInfo("fs10", default_float_size),
        "fs11": RegisterInfo("fs11", default_float_size),
        # f28-31 - FP temporaries (caller saved)
        "ft8": RegisterInfo("ft8", default_float_size),
        "ft9": RegisterInfo("ft9", default_float_size),
        "ft10": RegisterInfo("ft10", default_float_size),
        "ft11": RegisterInfo("ft11", default_float_size),
    }

    stack_pointer = "sp"

    def get_instruction_info(self, data, addr):

        instr = self.disassembler.decode(data, addr)

        if instr is None:
            return None

        result = InstructionInfo()
        result.length = instr.size

        dest = None

        if instr.imm is not None:
            dest = addr + instr.imm

        if instr.name == 'ret' or self._looks_like_ret(instr):
            result.add_branch(BranchType.FunctionReturn)
        elif instr.name in branch_ins:
            result.add_branch(BranchType.TrueBranch, dest)
            result.add_branch(BranchType.FalseBranch, addr + instr.size)
        elif instr.name in direct_jump_ins:
            result.add_branch(BranchType.UnconditionalBranch, dest)
        elif instr.name in indirect_jump_ins:
            result.add_branch(BranchType.UnresolvedBranch)
        elif instr.name in direct_call_ins:
            result.add_branch(BranchType.CallDestination, dest)
        elif instr.name in indirect_call_ins:
            result.add_branch(BranchType.UnresolvedBranch)

        return result

    def _looks_like_ret(self, instr):
        """
        Check for jump instruction that look like functions returns.
        """
        # any register jump to 'ra' the return address register, is probably a
        # function return.

        if (instr.name == 'jalr' and instr.operands[0] == 'zero'
                and instr.operands[1] == 'ra' and not instr.imm):
            # if jalr does not link into zero, then something weird
            # is going on and we don't want to mark this as a return.
            # similarly if a offset is added (via imm) to the ra register,
            # then this also doesn't look like a function return.
            return True
        elif (instr.name == 'jr' and instr.operands[0] == 'ra'
              and not instr.imm):
            return True

        return False

    def get_instruction_text(self, data, addr):

        instr = self.disassembler.decode(data, addr)

        if instr is None:
            return None

        tokens = gen_token(instr)

        return tokens, instr.size

    def get_instruction_low_level_il(self, data, addr, il):

        instr = self.disassembler.decode(data, addr)

        if instr is None:
            return None
        self.lifter.lift(il, instr, instr.name)

        return instr.size
Esempio n. 5
0
class Moxie(Architecture):
    name = "moxie"
    address_size = 4
    default_int_size = 4

    stack_pointer = 'sp'
    regs = {
        'sp': RegisterInfo('sp', 2),
        'fp': RegisterInfo('fp', 2),
        'r0': RegisterInfo('r0', 2),
        'r1': RegisterInfo('r1', 2),
        'r2': RegisterInfo('r2', 2),
        'r3': RegisterInfo('r3', 2),
        'r4': RegisterInfo('r4', 2),
        'r5': RegisterInfo('r5', 2),
        'r6': RegisterInfo('r6', 2),
        'r7': RegisterInfo('r7', 2),
        'r8': RegisterInfo('r8', 2),
        'r9': RegisterInfo('r9', 2),
        'r10': RegisterInfo('r10', 2),
        'r11': RegisterInfo('r11', 2),
        'r12': RegisterInfo('r12', 2),
        'r13': RegisterInfo('r13', 2),
    }

    def get_operands(self, instr, word):
        dst = None
        src = None

        if instr in NO_IMM_INSTRUCTIONS:
            return src, dst

        a = (word & 0xF0) >> 4
        b = word & 0xF

        if instr in ONE_REG_INSTRUCTIONS:
            dst = REGISTERS[a]

        elif instr in TWO_REG_INSTRUCTIONS:
            dst = REGISTERS[a]
            src = REGISTERS[b]

        return src, dst

    def decode_instruction(self, data, addr):
        instr = None
        length = 2
        extra = None
        src_value, dst_value = None, None
        dst_op, src_op = DEFAULT_MODE, DEFAULT_MODE
        src, dst = None, None

        if len(data) < 2:
            return instr, src, src_op, dst, dst_op, src_value, dst_value, length

        word = struct.unpack('>H', data[:2])[0]
        opcode_type = word >> 14

        if opcode_type == 0b11:  # is branch
            branch_type = (word & 0x3c00) >> 10
            if branch_type < len(BRANCH_INSTRUCTIONS):
                instr = BRANCH_INSTRUCTIONS[branch_type]
            else:
                log_error('[%x] Bad branch opcode: %x' % (addr, branch_type))
                return instr, src, src_op, dst, dst_op, src_value, dst_value, length

            branch_offset = word & 0x3ff
            dst_value = (branch_offset << 1) + addr
            src_op = EMPTY_MODE
            dst_op = IMM_ADDRESS_MODE

        elif opcode_type == 0b10:
            instr = SPECIAL_INSTRUCTIONS[(word >> 12) & 0x3]

            dst = (word & 0xf00 >> 8)
            dst = REGISTERS[dst]
            dst_op = REGISTER_MODE

            src_value = word & 0xff
            src_op = IMM_INTEGER_MODE

        elif opcode_type == 0b00:
            opcode = word >> 8
            instr = INSTRUCTIONS[opcode]
            src, dst = self.get_operands(instr, word)

            if instr in IMM_INSTRUCTION_16:
                extra = struct.unpack('>H', data[2:4])[0]
                length += 2
            elif instr in IMM_INSTRUCTION_32:
                extra = struct.unpack('>I', data[2:6])[0]
                length += 4

            if instr in ONE_REG_INSTRUCTIONS:
                dst_op = REGISTER_MODE
                src_op = EMPTY_MODE
                if extra:
                    src_value = extra
                    src_op = IMM_INTEGER_MODE
            elif instr in TWO_REG_INSTRUCTIONS:
                src_op = REGISTER_MODE
                dst_op = REGISTER_MODE

            elif instr in NO_IMM_INSTRUCTIONS and extra:
                src = None
                dst = None
                src_op = EMPTY_MODE
                dst_op = IMM_INTEGER_MODE
                dst_value = extra
            else:
                src_op = EMPTY_MODE
                dst_op = EMPTY_MODE

        return instr, src, src_op, dst, dst_op, src_value, dst_value, length

    def perform_get_instruction_info(self, data, addr):
        instr, src, src_op, dst, dst_op, src_value, dst_value, length = self.decode_instruction(
            data, addr)
        res = InstructionInfo()
        res.length = length

        if instr in {'ret'}:
            res.add_branch(BranchType.FunctionReturn)
        elif instr in BRANCH_INSTRUCTIONS:
            res.add_branch(BranchType.TrueBranch, dst_value)
            res.add_branch(BranchType.FalseBranch, addr + 16)
        elif instr == 'jsra':
            res.add_branch(BranchType.CallDestination, dst_value)
        elif instr == "jmpa":
            res.add_branch(BranchType.UnconditionalBranch, dst_value)

        return res

    def perform_get_instruction_text(self, data, addr):
        instr, src, src_op, dst, dst_op, src_value, dst_value, length = self.decode_instruction(
            data, addr)

        if instr is None:
            return None

        instruction_text = instr
        dst_token = None
        src_token = None

        tokens = [
            InstructionTextToken(InstructionTextTokenType.TextToken,
                                 '{:9s}'.format(instruction_text))
        ]
        if dst_op != EMPTY_MODE:
            dst_token = OperandTokens[dst_op](dst, dst_value)
            tokens += dst_token
        if src_op != EMPTY_MODE:
            src_token = OperandTokens[src_op](src, src_value)

        if src_op != EMPTY_MODE and dst_op != EMPTY_MODE:
            tokens += [
                InstructionTextToken(InstructionTextTokenType.TextToken, ', ')
            ]
        if src_token:
            tokens += src_token

        return tokens, length

    def perform_get_instruction_low_level_il(self, data, addr, il):
        instr, src, src_op, dst, dst_op, src_value, dst_value, length = self.decode_instruction(
            data, addr)

        if instr is None:
            return None
        il.append(il.unimplemented())
        return length
Esempio n. 6
0
class Clemency(Architecture):
    name = "clemency"
    address_size = 4
    default_int_size = 4

    # Register setup
    regs = {
        'R0': RegisterInfo('R0', 4),
        'R1': RegisterInfo('R1', 4),
        'R2': RegisterInfo('R2', 4),
        'R3': RegisterInfo('R3', 4),
        'R4': RegisterInfo('R4', 4),
        'R5': RegisterInfo('R5', 4),
        'R6': RegisterInfo('R6', 4),
        'R7': RegisterInfo('R7', 4),
        'R8': RegisterInfo('R8', 4),
        'R9': RegisterInfo('R9', 4),
        'R10': RegisterInfo('R10', 4),
        'R11': RegisterInfo('R11', 4),
        'R12': RegisterInfo('R12', 4),
        'R13': RegisterInfo('R13', 4),
        'R14': RegisterInfo('R14', 4),
        'R15': RegisterInfo('R15', 4),
        'R16': RegisterInfo('R16', 4),
        'R17': RegisterInfo('R17', 4),
        'R18': RegisterInfo('R18', 4),
        'R19': RegisterInfo('R19', 4),
        'R20': RegisterInfo('R20', 4),
        'R21': RegisterInfo('R21', 4),
        'R22': RegisterInfo('R22', 4),
        'R23': RegisterInfo('R23', 4),
        'R24': RegisterInfo('R24', 4),
        'R25': RegisterInfo('R25', 4),
        'R26': RegisterInfo('R26', 4),
        'R27': RegisterInfo('R27', 4),
        'R28': RegisterInfo('R28', 4),
        'ST': RegisterInfo('ST', 4),
        'RA': RegisterInfo('RA', 4),
        'PC': RegisterInfo('PC', 4)
    }

    stack_pointer = 'ST'

    # Flag setup
    flags = ['s', 'o', 'c', 'z']

    flag_roles = {
        's': FlagRole.NegativeSignFlagRole,
        'o': FlagRole.OverflowFlagRole,
        'c': FlagRole.CarryFlagRole,
        'z': FlagRole.ZeroFlagRole
    }

    flag_write_types = ['', '*']

    flags_written_by_flag_write_type = {'*': ['s', 'o', 'c', 'z']}

    flags_required_for_flag_condition = {
        LowLevelILFlagCondition.LLFC_NE: ['z'],  #not equal
        LowLevelILFlagCondition.LLFC_E: ['z'],  #equal
        LowLevelILFlagCondition.LLFC_ULT: ['c', 'z'],  # unsigned less than
        LowLevelILFlagCondition.LLFC_ULE: ['c',
                                           'z'],  # unsigned less than or equal
        LowLevelILFlagCondition.LLFC_UGT: ['c', 'z'],  # unsigned greater than
        LowLevelILFlagCondition.LLFC_UGE:
        ['c', 'z'],  # unsigned greater than or equal
        LowLevelILFlagCondition.LLFC_SLT: ['s'],  # signed less than
        LowLevelILFlagCondition.LLFC_SLE: ['s',
                                           'z'],  # signed less than or equal
        LowLevelILFlagCondition.LLFC_SGT: ['s', 'z'],  # signed greater than
        LowLevelILFlagCondition.LLFC_SGE:
        ['s'],  # signed greater than or equal
        LowLevelILFlagCondition.LLFC_NEG: ['s'],  # negative
        LowLevelILFlagCondition.LLFC_POS: ['s'],  # positive
        LowLevelILFlagCondition.LLFC_O: ['o'],  # overflow
        LowLevelILFlagCondition.LLFC_NO: ['o']  # no overflow
    }

    def perform_get_instruction_info(self, data, addr):

        reader = BitReader16(BytestringReader(data))
        try:
            ins = disassemble(reader)
        except InvalidMachineCodeException as e:
            log_error("InvalidMachineCodeException at address: " + hex(addr) +
                      " {0}".format(e))
            return None

        insInfo = InstructionInfo()
        insInfo.length = reader.nytes_read() * 2

        op = ins.mnemonic
        if op in ['re', 'ht']:
            insInfo.add_branch(BranchType.FunctionReturn)
        elif op in ['b', 'brr']:
            # relative direct unconditional
            insInfo.add_branch(BranchType.UnconditionalBranch,
                               addr + 2 * ins.op1.value)
        elif op in [
                'bn', 'be', 'bl', 'ble', 'bg', 'bge', 'bno', 'bo', 'bns', 'bs',
                'bsl', 'bsle', 'bsg', 'bsge'
        ]:
            # relative direct conditional
            insInfo.add_branch(BranchType.TrueBranch, addr + 2 * ins.op1.value)
            insInfo.add_branch(BranchType.FalseBranch, addr + insInfo.length)
        elif op == 'br':
            # absolute indirect unconditonal
            insInfo.add_branch(BranchType.IndirectBranch)
        elif op in [
                'brn', 'bre', 'brl', 'brle', 'brg', 'brge', 'brno', 'bro',
                'brns', 'brs', 'brsl', 'brsle', 'brsg', 'brsge'
        ]:
            # absolute indirect conditonal
            insInfo.add_branch(BranchType.TrueBranch)
            insInfo.add_branch(BranchType.FalseBranch, addr + insInfo.length)
        elif op == 'bra':
            # absolute direct
            insInfo.add_branch(BranchType.UnconditionalBranch,
                               2 * ins.op1.value)
        elif op in ['c', 'car']:
            # relative direct unconditional
            insInfo.add_branch(BranchType.CallDestination,
                               addr + 2 * ins.op1.value)
        elif op in [
                'cn', 'ce', 'cl', 'cle', 'cg', 'cge', 'cno', 'co', 'cns', 'cs',
                'csl', 'csle', 'csg', 'csge'
        ]:
            # relative direct conditional
            insInfo.add_branch(BranchType.CallDestination,
                               addr + 2 * ins.op1.value)
            #insInfo.add_branch(BranchType.TrueBranch, addr + 2 * ins.op1.value)
            insInfo.add_branch(BranchType.FalseBranch, addr + insInfo.length)
        elif op == 'caa':
            insInfo.add_branch(BranchType.CallDestination, 2 * ins.op1.value)
        elif op == 'cr':
            insInfo.add_branch(BranchType.CallDestination)
        elif op in [
                'crn', 'cre', 'crl', 'crle', 'crg', 'crge', 'crno', 'cro',
                'crns', 'crs', 'crsl', 'crsle', 'crsg', 'crsge'
        ]:
            insInfo.add_branch(BranchType.CallDestination)
            insInfo.add_branch(BranchType.FalseBranch, addr + insInfo.length)

        return insInfo

    def perform_get_instruction_text(self, data, addr):
        reader = BitReader16(BytestringReader(data))
        try:
            ins = disassemble(reader)
        except InvalidMachineCodeException:
            log_error("InvalidMachineCodeException at address: " + addr)
            return None

        tokens = []

        tokens.append(
            InstructionTextToken(InstructionTextTokenType.InstructionToken,
                                 ins.mnemonic))
        tokens.append(
            InstructionTextToken(
                InstructionTextTokenType.OperandSeparatorToken, " "))

        if ins.is_load_or_store():
            tokens.append(
                InstructionTextToken(InstructionTextTokenType.RegisterToken,
                                     str(ins.op1.reg)))
            tokens.append(
                InstructionTextToken(
                    InstructionTextTokenType.OperandSeparatorToken, ', ['))
            tokens.append(
                InstructionTextToken(InstructionTextTokenType.RegisterToken,
                                     str(ins.op2.reg)))
            tokens.append(
                InstructionTextToken(
                    InstructionTextTokenType.OperandSeparatorToken, ' + '))
            tokens.append(
                InstructionTextToken(InstructionTextTokenType.IntegerToken,
                                     hex(2 * ins.op3.value),
                                     2 * ins.op3.value))
            tokens.append(
                InstructionTextToken(
                    InstructionTextTokenType.OperandSeparatorToken, ', '))
            tokens.append(
                InstructionTextToken(InstructionTextTokenType.IntegerToken,
                                     str(ins.op4.value), ins.op4.value))
            tokens.append(
                InstructionTextToken(
                    InstructionTextTokenType.OperandSeparatorToken, ']'))
        elif ins.is_direct_relative_jmp_or_call():
            tokens.append(
                InstructionTextToken(
                    InstructionTextTokenType.CodeRelativeAddressToken,
                    hex(addr + 2 * ins.op1.value), addr + 2 * ins.op1.value))
        elif ins.is_direct_jmp_or_call():
            tokens.append(
                InstructionTextToken(
                    InstructionTextTokenType.PossibleAddressToken,
                    hex(2 * ins.op1.value), 2 * ins.op1.value))
        elif ins.mnemonic == 'mov':
            tokens.append(
                InstructionTextToken(InstructionTextTokenType.RegisterToken,
                                     str(ins.op1.reg)))
            tokens.append(
                InstructionTextToken(
                    InstructionTextTokenType.OperandSeparatorToken, ', '))
            tokens.append(
                InstructionTextToken(InstructionTextTokenType.IntegerToken,
                                     hex(ins.op2.value), ins.op2.value))
            tokens.append(
                InstructionTextToken(
                    InstructionTextTokenType.OperandSeparatorToken, ' / '))
            tokens.append(
                InstructionTextToken(InstructionTextTokenType.IntegerToken,
                                     hex(2 * ins.op2.value),
                                     2 * ins.op2.value))
        else:
            for i, op in enumerate(ins.operands):
                if op.is_reg():
                    tokens.append(
                        InstructionTextToken(
                            InstructionTextTokenType.RegisterToken,
                            str(op.reg)))
                elif op.is_imm():
                    tokens.append(
                        InstructionTextToken(
                            InstructionTextTokenType.IntegerToken,
                            hex(op.value), op.value))
                if i + 1 != ins.arity:
                    tokens.append(
                        InstructionTextToken(
                            InstructionTextTokenType.OperandSeparatorToken,
                            ', '))

        global strings
        if ins.mnemonic == 'mov' and ins.operands[1].value in strings:
            string = strings[ins.operands[1].value]
            tokens.append(
                InstructionTextToken(InstructionTextTokenType.StringToken,
                                     ' // {}'.format(string)))
        return tokens, reader.nytes_read() * 2

    def perform_get_instruction_low_level_il(self, data, addr, il):

        reader = BitReader16(BytestringReader(data))
        try:
            ins = disassemble(reader)
        except InvalidMachineCodeException:
            log_error("InvalidMachineCodeException at address: " + addr)
            return None

        # TODO

        return reader.nytes_read() * 2
Esempio n. 7
0
class Spu(Architecture):
    name = 'spu'
    address_size = 4
    default_int_size = 4
    max_instr_length = 4

    regs = dict((reg, RegisterInfo(reg, 16)) for reg in registers)

    stack_pointer = 'sp'

    flags = ('c', 'z', 'i', 'd', 'b', 'v', 's')
    flag_write_types = ('*', 'czs', 'zvs', 'zs')
    flag_roles = {
        'c': FlagRole.
        SpecialFlagRole,  # Not a normal carry flag, subtract result is inverted
        'z': FlagRole.ZeroFlagRole,
        'v': FlagRole.OverflowFlagRole,
        's': FlagRole.NegativeSignFlagRole
    }

    flags_required_for_flag_condition = {
        LowLevelILFlagCondition.LLFC_UGE: ['c'],
        LowLevelILFlagCondition.LLFC_ULT: ['c'],
        LowLevelILFlagCondition.LLFC_E: ['z'],
        LowLevelILFlagCondition.LLFC_NE: ['z'],
        LowLevelILFlagCondition.LLFC_NEG: ['s'],
        LowLevelILFlagCondition.LLFC_POS: ['s']
    }

    flags_written_by_flag_write_type = {
        '*': ['c', 'z', 'v', 's'],
        'czs': ['c', 'z', 's'],
        'zvs': ['z', 'v', 's'],
        'zs': ['z', 's']
    }

    itable = [None] * 2048

    _comma_separator = InstructionTextToken(OperandSeparatorToken, ', ')

    def __init__(self, *args, **kwargs):
        super(Spu, self).__init__(*args, **kwargs)
        self.init_instructions()

    def init_instructions(self):
        # Start idef classes
        class idef(object):
            def __init__(self, name):
                self.name = name

            def decode(self, opcode, addr):
                raise NotImplementedError

            def get_text(self, opcode, addr):
                raise NotImplementedError

        class idef_RR(idef):
            def decode(self, opcode, addr):
                op, rb, ra, rt = decode_RR(opcode)
                return ThreeRegisters(registers[rb], registers[ra],
                                      registers[rt])

            def get_text(self, opcode, addr):
                rb, ra, rt = self.decode(opcode, addr)
                return (
                    InstructionTextToken(TextToken,
                                         '{:10s}'.format(self.name)),
                    InstructionTextToken(RegisterToken, rt),
                    Spu._comma_separator,
                    InstructionTextToken(RegisterToken, ra),
                    Spu._comma_separator,
                    InstructionTextToken(RegisterToken, rb),
                )

        class idef_ROHROL(idef_RR):
            def decode(self, opcode, addr):
                op, roh, ra, rol = decode_RR(opcode)
                # prefetch = roh & 0x40 != 0

                roh &= 3
                val = roh << 7 | rol
                if val & 0x100:
                    val -= 0x200
                val = (val << 2) + addr

                # if prefetch:
                #    if p.cmd.Op2.reg == 0:
                #         p.cmd.Op2.type = o_void
                #         if val == 0:
                #             p.cmd.Op1.type = o_void

                return ImmediateRegister(val, registers[ra])

            def get_text(self, opcode, addr):
                brinst, brtarg = self.decode(opcode, addr)

                return (
                    InstructionTextToken(TextToken,
                                         '{:10s}'.format(self.name)),
                    InstructionTextToken(PossibleAddressToken,
                                         '{:#x}'.format(brinst), brinst),
                    Spu._comma_separator,
                    InstructionTextToken(RegisterToken, brtarg),
                )

        class idef_R(idef_RR):
            def __init__(self, name, noRA=False):
                self.name = name
                self.noRA = noRA

            def get_text(self, opcode, addr):
                _, ra, rt = self.decode(opcode, addr)

                tokens = [
                    InstructionTextToken(TextToken, '{:10s}'.format(self.name))
                ]
                if not self.noRA:
                    tokens.extend(
                        (InstructionTextToken(RegisterToken,
                                              ra), Spu._comma_separator))

                tokens.append(InstructionTextToken(RegisterToken, rt))
                return tokens

        class idef_SPR(idef):
            def __init__(self, name, swap=False, offset=128):
                self.name = name
                self.swap = swap
                self.offset = offset

            def decode(self, opcode, addr):
                op, iii, sa, rt = decode_RR(opcode)
                sa += self.offset
                if self.swap:
                    rt, sa = sa, rt

                return ImmediateTwoRegisters(iii, registers[sa], registers[rt])

            def get_text(self, opcode, addr):
                _, sa, rt = self.decode(opcode, addr)

                return (
                    InstructionTextToken(TextToken,
                                         '{:10s}'.format(self.name)),
                    InstructionTextToken(RegisterToken, rt),
                    Spu._comma_separator,
                    InstructionTextToken(RegisterToken, sa),
                )

        class idef_CH(idef_SPR):
            def __init__(self, name, swap=False):
                idef_SPR.__init__(self, name, swap, 256)

        class idef_noops(idef):
            def __init__(self, name, cbit=False):
                self.name = name
                self.cbit = cbit
                self.cf = 0

            def decode(self, opcode, addr):
                op, iii1, iii2, iii3 = decode_RR(opcode)
                # if self.cbit and p.cmd.Op3.reg & 0x40 != 0:
                #     iii1 &= ~0x40
                return

            def get_text(self, opcode, addr):
                # TODO: To add false targets or not to add.. that is the question
                return InstructionTextToken(TextToken,
                                            '{:10s}'.format(self.name)),

        class idef_RRR(idef):
            def decode(self, opcode, addr):
                op, rt, rb, ra, rc = decode_RRR(opcode)
                return FourRegisters(registers[rt], registers[rb],
                                     registers[ra], registers[rc])

            def get_text(self, opcode, addr):
                rt, rb, ra, rc = self.decode(opcode, addr)
                return (
                    InstructionTextToken(TextToken,
                                         '{:10s}'.format(self.name)),
                    InstructionTextToken(RegisterToken, rt),
                    Spu._comma_separator,
                    InstructionTextToken(RegisterToken, ra),
                    Spu._comma_separator,
                    InstructionTextToken(RegisterToken, rb),
                    Spu._comma_separator,
                    InstructionTextToken(RegisterToken, rc),
                )

        class idef_Branch(idef_RR):
            def __init__(self, name, no2=False, uncond=False):
                self.name = name
                self.no2 = no2

            def get_text(self, opcode, addr):
                rb, ra, rt = self.decode(opcode, addr)

                tokens = [
                    InstructionTextToken(TextToken, '{:10s}'.format(self.name))
                ]
                if not self.no2:
                    tokens.extend(
                        (InstructionTextToken(RegisterToken,
                                              ra), Spu._comma_separator,
                         InstructionTextToken(RegisterToken, rb)))

                tokens.append(InstructionTextToken(RegisterToken, rt))
                return tokens

        class idef_RI7(idef):
            def __init__(self, name, signed=True):
                self.name = name
                self.signed = signed

            def decode(self, opcode, addr):
                op, i7, ra, rt = decode_RI7(opcode)
                if self.signed and i7 & 0x40:
                    i7 -= 0x80
                return ImmediateTwoRegisters(i7, registers[ra], registers[rt])

            def get_text(self, opcode, addr):
                i7, ra, rt = self.decode(opcode, addr)
                return (
                    InstructionTextToken(TextToken,
                                         '{:10s}'.format(self.name)),
                    InstructionTextToken(RegisterToken, rt),
                    Spu._comma_separator,
                    InstructionTextToken(RegisterToken, ra),
                    Spu._comma_separator,
                    InstructionTextToken(IntegerToken, '{:#x}'.format(i7), i7),
                )

        class idef_RI8(idef):
            def __init__(self, name, bias):
                self.name = name
                self.bias = bias
                # self.cf = CF_CHG1 | CF_USE2 | CF_USE3

            def decode(self, opcode):
                op, i8, ra, rt = decode_RI8(opcode)
                i8 = self.bias - i8

                return ImmediateTwoRegisters(i8, registers[ra], registers[rt])

        class idef_RI7_ls(idef_RI7):
            pass
            # def decode(self, opcode, addr):
            #     # _, p.cmd.Op2.addr, p.cmd.Op2.reg, p.cmd.Op1.reg = decode_RI7(opcode)
            #     return decode_RI7(opcode)
            #     # p.cmd.Op1.type = o_reg
            #     # p.cmd.Op2.type = o_displ
            #     # p.cmd.Op2.dtyp = dt_byte16
            #     # if p.cmd.Op2.addr & 0x40:
            #     #     p.cmd.Op2.addr -= 0x80
            #     #     p.cmd.Op2.specval |= spu_processor_t.FL_SIGNED

        class idef_RI10(idef):
            def __init__(self, name, signed=True):
                self.name = name
                self.signed = signed

            def decode(self, opcode, addr):
                op, i10, ra, rt = decode_RI10(opcode)
                if self.signed:
                    if i10 & 0x200:
                        i10 -= 0x400
                return ImmediateTwoRegisters(i10, registers[ra], registers[rt])

            def get_text(self, opcode, addr):
                i10, ra, rt = self.decode(opcode, addr)

                name = self.name
                if i10 == 0 and name is 'ori':
                    name = 'lr'

                tokens = [
                    InstructionTextToken(TextToken, '{:10s}'.format(name)),
                    InstructionTextToken(RegisterToken, rt),
                    Spu._comma_separator,
                    InstructionTextToken(RegisterToken, ra)
                ]

                if name is not 'lr':
                    tokens.extend(
                        (Spu._comma_separator,
                         InstructionTextToken(IntegerToken,
                                              '{:#x}'.format(i10), i10)))

                return tokens

        class idef_RI10_ls(idef_RI10):
            def decode(self, opcode, addr):
                op, i10, ra, rt = decode_RI10(opcode)
                i10 <<= 4
                if i10 & 0x2000:
                    i10 -= 0x4000
                return ImmediateTwoRegisters(i10, registers[ra], registers[rt])

        class idef_RI16(idef):
            def __init__(self,
                         name,
                         flags=0,
                         noRA=False,
                         isBranch=True,
                         signext=False):
                self.name = name
                self.noRA = noRA
                self.isBranch = isBranch
                self.signext = signext

            def decode(self, opcode, addr):
                op, i16, rt = decode_RI16(opcode)
                if self.signext and i16 & 0x8000:
                    i16 -= 0x10000
                # self.fixRA()
                return ImmediateRegister(i16, registers[rt])

            def get_text(self, opcode, addr):
                i16, rt = self.decode(opcode, addr)
                tokens = [
                    InstructionTextToken(TextToken, '{:10s}'.format(self.name))
                ]
                if not self.noRA:
                    tokens.extend(
                        (InstructionTextToken(RegisterToken,
                                              rt), Spu._comma_separator))
                tokens.append(
                    InstructionTextToken(PossibleAddressToken,
                                         '{:#x}'.format(i16), i16))
                return tokens

        class idef_RI16_abs(idef_RI16):
            def decode(self, opcode, addr):
                i16, rt = idef_RI16.decode(self, opcode, addr)
                i16 <<= 2
                return ImmediateRegister(i16, rt)

        class idef_RI16_rel(idef_RI16_abs):
            def decode(self, opcode, addr):
                i16, rt = idef_RI16.decode(self, opcode, addr)
                i16 = (i16 << 2) + addr
                if i16 & 0x40000:
                    i16 &= ~0x40000
                return ImmediateRegister(i16, rt)

            def get_text(self, opcode, addr):
                i16, rt = self.decode(opcode, addr)

                tokens = [
                    InstructionTextToken(TextToken, '{:10s}'.format(self.name))
                ]
                if not self.noRA:
                    tokens.extend(
                        (InstructionTextToken(RegisterToken,
                                              rt), Spu._comma_separator))

                tokens.append(
                    InstructionTextToken(PossibleAddressToken,
                                         '{:#x}'.format(i16), i16))
                return tokens

        class idef_RI18(idef):
            def decode(self, opcode, addr):
                op, i18, rt = decode_RI18(opcode)
                return ImmediateRegister(i18, registers[rt])

            def get_text(self, opcode, addr):
                i18, rt = self.decode(opcode, addr)
                return (
                    InstructionTextToken(TextToken,
                                         '{:10s}'.format(self.name)),
                    InstructionTextToken(RegisterToken, rt),
                    Spu._comma_separator,
                    InstructionTextToken(PossibleAddressToken,
                                         '{:#x}'.format(i18), i18),
                )

        class idef_I16RO(idef):
            def __init__(self, name, rel=False):
                self.name = name
                self.cf = 0
                self.rel = rel

            def decode(self, opcode, addr):
                op, roh, i16, rol = decode_I16RO(opcode)

                val = (roh << 7) | rol
                if val & 0x200:
                    val -= 0x400
                val = (val << 2) + addr

                if self.rel:
                    # i16 is signed relative offset
                    if i16 & 0x8000:
                        i16 -= 0x10000
                    i16 = addr + (i16 << 2)
                else:
                    i16 <<= 2

                return TwoImmediates(val, i16)

            def get_text(self, opcode, addr):
                brinst, brtarg = self.decode(opcode, addr)

                return (
                    InstructionTextToken(TextToken,
                                         '{:10s}'.format(self.name)),
                    InstructionTextToken(PossibleAddressToken,
                                         '{:#x}'.format(brinst), brinst),
                    Spu._comma_separator,
                    InstructionTextToken(PossibleAddressToken,
                                         '{:#x}'.format(brtarg), brtarg),
                )

        class idef_stop(idef):
            def decode(self, opcode, addr):
                _, t = decode_STOP(opcode)
                # p.cmd.Op1.type = o_imm
                # p.cmd.Op1.value = t
                return t

            def get_text(self, opcode, addr):
                # t = self.decode(opcode, addr)
                return tuple()

        # End idef classes

        itable_RI10 = {
            0x04: idef_RI10('ori'),
            0x05: idef_RI10('orhi'),
            0x06: idef_RI10('orbi'),
            0x0c: idef_RI10('sfi'),
            0x0d: idef_RI10('sfhi'),
            0x14: idef_RI10('andi'),
            0x15: idef_RI10('andhi'),
            0x16: idef_RI10('andbi'),
            0x1c: idef_RI10('ai'),
            0x1d: idef_RI10('ahi'),
            0x24: idef_RI10_ls('stqd'),
            0x34: idef_RI10_ls('lqd'),
            0x44: idef_RI10('xori'),
            0x45: idef_RI10('xorhi'),
            0x46: idef_RI10('xorbi', signed=False),
            0x4c: idef_RI10('cgti'),
            0x4d: idef_RI10('cgthi'),
            0x4e: idef_RI10('cgtbi'),
            0x4f: idef_RI10('hgti'),  # false target
            0x5c: idef_RI10('clgti'),
            0x5d: idef_RI10('clgthi'),
            0x5e: idef_RI10('clgtbi'),
            0x5f: idef_RI10('hlgti'),  # false target
            0x74: idef_RI10('mpyi'),
            0x75: idef_RI10('mpyui'),
            0x7c: idef_RI10('ceqi'),
            0x7d: idef_RI10('ceqhi'),
            0x7e: idef_RI10('ceqbi'),
            0x7f: idef_RI10('heqi'),
        }

        # 11-bit opcodes (bits 0:10)
        itable_RR = {
            0x000: idef_stop('stop'),
            0x001: idef_noops('lnop'),  # no regs
            0x002: idef_noops('sync', cbit=True),  # C/#C
            0x003: idef_noops('dsync'),  # no regs
            0x00c: idef_SPR('mfspr'),  # SA = number
            0x00d: idef_CH('rdch'),  # //, CA, RT
            0x00f: idef_CH('rchcnt'),  # //, CA, RT
            0x040: idef_RR('sf'),
            0x041: idef_RR('or'),
            0x042: idef_RR('bg'),
            0x048: idef_RR('sfh'),
            0x049: idef_RR('nor'),
            0x053: idef_RR('absdb'),
            0x058: idef_RR('rot'),
            0x059: idef_RR('rotm'),
            0x05a: idef_RR('rotma'),
            0x05b: idef_RR('shl'),
            0x05c: idef_RR('roth'),
            0x05d: idef_RR('rothm'),
            0x05e: idef_RR('rotmah'),
            0x05f: idef_RR('shlh'),
            0x07f: idef_RR('shlhi'),
            0x0c0: idef_RR('a'),
            0x0c1: idef_RR('and'),
            0x0c2: idef_RR('cg'),
            0x0c8: idef_RR('ah'),
            0x0c9: idef_RR('nand'),
            0x0d3: idef_RR('avgb'),
            0x10c: idef_SPR('mtspr', swap=True),  # SA = number
            0x10d: idef_CH('wrch', swap=True),  # // CA RT
            0x128: idef_Branch('biz'),  # branch
            0x129: idef_Branch('binz'),  # branch
            0x12a: idef_Branch('bihz'),  # branch
            0x12b: idef_Branch('bihnz'),  # branch
            0x140: idef_RR('stopd'),
            0x144: idef_RR('stqx'),
            0x1a8: idef_Branch('bi', no2=True, uncond=True),  # branch
            0x1a9: idef_Branch('bisl'),  # branch
            0x1aa: idef_Branch('iret', no2=True, uncond=True),  # branch
            0x1ab: idef_Branch('bisled'),  # branch
            0x1ac: idef_ROHROL('hbr'),  # ROH/ROL form
            0x1b0: idef_R('gb'),  # no first reg
            0x1b1: idef_R('gbh'),  # no first reg
            0x1b2: idef_R('gbb'),  # no first reg
            0x1b4: idef_R('fsm'),  # no first reg
            0x1b5: idef_R('fsmh'),  # no first reg
            0x1b6: idef_R('fsmb'),  # no first reg
            0x1b8: idef_R('frest'),  # no first reg
            0x1b9: idef_R('frsqest'),  # no first reg
            0x1c4: idef_RR('lqx'),
            0x1cc: idef_RR('rotqbybi'),
            0x1cd: idef_RR('rotqmbybi'),
            0x1cf: idef_RR('shlqbybi'),
            0x1d4: idef_RR('cbx'),
            0x1d5: idef_RR('chx'),
            0x1d6: idef_RR('cwx'),
            0x1d7: idef_RR('cdx'),
            0x1d8: idef_RR('rotqbi'),
            0x1d9: idef_RR('rotqmbi'),
            0x1db: idef_RR('shlqbi'),
            0x1dc: idef_RR('rotqby'),
            0x1dd: idef_RR('rotqmby'),
            0x1df: idef_RR('shlqby'),
            0x1f0: idef_R('orx'),  # no first reg
            0x201: idef_noops('nop'),  # no regs
            0x240: idef_RR('cgt'),
            0x241: idef_RR('xor'),
            0x248: idef_RR('cgth'),
            0x249: idef_RR('eqv'),
            0x250: idef_RR('cgtb'),
            0x253: idef_RR('sumb'),
            0x258: idef_RR('hgt'),
            0x2a5: idef_R('clz'),  # no first reg
            0x2a6: idef_R('xswd'),  # no first reg
            0x2ae: idef_R('xshw'),  # no first
            0x2b4: idef_R('cntb'),  # no first reg
            0x2b6: idef_R('xsbh'),  # no first reg
            0x2c0: idef_RR('clgt'),
            0x2c1: idef_RR('andc'),
            0x2c2: idef_RR('fcgt'),
            0x2c3: idef_RR('dfcgt'),
            0x2c4: idef_RR('fa'),
            0x2c5: idef_RR('fs'),
            0x2c6: idef_RR('fm'),
            0x2c8: idef_RR('clgth'),
            0x2c9: idef_RR('orc'),
            0x2ca: idef_RR('fcmgt'),
            0x2cb: idef_RR('dfcmgt'),
            0x2cc: idef_RR('dfa'),
            0x2cd: idef_RR('dfs'),
            0x2ce: idef_RR('dfm'),
            0x2d0: idef_RR('clgtb'),
            0x2d8: idef_RR('hlgt'),  # false target
            0x340: idef_RR('addx'),
            0x341: idef_RR('sfx'),
            0x342: idef_RR('cgx'),
            0x343: idef_RR('bgx'),
            0x346: idef_RR('mpyhha'),
            0x34e: idef_RR('mpyhhau'),
            0x35c: idef_RR('dfma'),
            0x35d: idef_RR('dfms'),
            0x35e: idef_RR('dfnms'),
            0x35f: idef_RR('dfnma'),
            0x398: idef_R('fscrrd', noRA=True),  # no first and second
            0x3b8: idef_R('fesd'),  # no first
            0x3b9: idef_R('frds'),  # no first
            0x3ba: idef_R('fscrwr'),  # no first, rt is false target
            0x3c0: idef_RR('ceq'),
            0x3c2: idef_RR('fceq'),
            0x3c3: idef_RR('dfceq'),
            0x3c4: idef_RR('mpy'),
            0x3c5: idef_RR('mpyh'),
            0x3c7: idef_RR('mpys'),
            0x3c6: idef_RR('mpyhh'),
            0x3c8: idef_RR('ceqh'),
            0x3ca: idef_RR('fcmeq'),
            0x3cb: idef_RR('dfcmeq'),
            0x3cc: idef_RR('mpyu'),
            0x3ce: idef_RR('mpyhhu'),
            0x3d0: idef_RR('ceqb'),
            0x3d4: idef_RR('fi'),
            0x3d8: idef_RR('heq'),  # rt is false target
        }

        # 4-bit opcodes (bits 0:3)
        itable_RRR = {
            0x8: idef_RRR('selb'),
            0xb: idef_RRR('shufb'),
            0xc: idef_RRR('mpya'),
            0xd: idef_RRR('fnms'),
            0xe: idef_RRR('fma'),
            0xf: idef_RRR('fms'),
        }

        itable_RI16 = {
            0x040: idef_RI16_rel('brz'),
            0x041: idef_RI16_abs('stqa', isBranch=False),
            0x042: idef_RI16_rel('brnz'),
            0x044: idef_RI16_rel('brhz'),
            0x046: idef_RI16_rel('brhnz'),
            0x047: idef_RI16_rel('stqr', isBranch=False),
            0x060: idef_RI16_abs('bra', noRA=True),
            0x061: idef_RI16_abs('lqa', isBranch=False),
            0x062: idef_RI16_abs('brasl'),
            0x064: idef_RI16_rel('br', noRA=True),
            0x065: idef_RI16('fsmbi'),
            0x066: idef_RI16_rel('brsl'),
            0x067: idef_RI16_rel('lqr', isBranch=False),
            0x081: idef_RI16('il', signext=True),
            0x082: idef_RI16('ilhu'),
            0x083: idef_RI16('ilh'),
            0x0c1: idef_RI16('iohl'),
        }

        itable_RI7 = {
            0x078: idef_RI7('roti'),
            0x079: idef_RI7('rotmi'),
            0x07a: idef_RI7('rotmai'),
            0x07b: idef_RI7('shli'),
            0x07c: idef_RI7('rothi'),
            0x07d: idef_RI7('rothmi'),
            0x07e: idef_RI7('rotmahi'),
            0x1f4: idef_RI7_ls('cbd'),
            0x1f5: idef_RI7_ls('chd'),
            0x1f6: idef_RI7_ls('cwd'),
            0x1f7: idef_RI7_ls('cdd'),
            0x1f8: idef_RI7('rotqbii'),
            0x1f9: idef_RI7('rotqmbii'),
            0x1fb: idef_RI7('shlqbii'),
            0x1fc: idef_RI7('rotqbyi'),
            0x1fd: idef_RI7('rotqmbyi'),
            0x1ff: idef_RI7('shlqbyi'),
            0x3bf: idef_RI7('dftsv', signed=False),
        }

        itable_RI18 = {
            0x21: idef_RI18('ila'),
            0x08: idef_I16RO('hbra'),  # roh/rol
            0x09: idef_I16RO('hbrr', rel=True),  # roh/rol
        }

        # 10-bit opcodes (bits 0:9)
        itable_RI8 = {
            0x1d8: idef_RI8('cflts', 173),
            0x1d9: idef_RI8('cfltu', 173),
            0x1da: idef_RI8('csflt', 155),
            0x1db: idef_RI8('cuflt', 155),
        }

        for i in xrange(2048):
            opcode = i << 21
            RR = decode_RR(opcode)
            RRR = decode_RRR(opcode)
            RI7 = decode_RI7(opcode)
            RI8 = decode_RI8(opcode)
            RI10 = decode_RI10(opcode)
            RI16 = decode_RI16(opcode)
            RI18 = decode_RI18(opcode)

            ins = (itable_RR.get(RR[0], None) or itable_RRR.get(RRR[0], None)
                   or itable_RI7.get(RI7[0], None)
                   or itable_RI8.get(RI8[0], None)
                   or itable_RI10.get(RI10[0], None)
                   or itable_RI16.get(RI16[0], None)
                   or itable_RI18.get(RI18[0], None))

            if ins:
                self.itable[i] = ins

    def retrieve_instruction(self, data):
        try:
            opcode = struct.unpack('>I', data[:self.address_size])[0]
        except struct.error:
            return

        return self.itable[IBITS(opcode, 0, 10)], opcode

    def perform_get_instruction_info(self, data, addr):
        instruction, opcode = self.retrieve_instruction(data)
        if not instruction:
            return

        result = InstructionInfo()
        result.length = self.address_size

        inst_name = instruction.name
        if inst_name in ('bi', 'iret'):
            result.add_branch(FunctionReturn)
        elif inst_name in ('brsl', 'brasl'):
            branch_addr, _ = instruction.decode(opcode, addr)
            result.add_branch(CallDestination, branch_addr)
        elif inst_name == ('bisl', 'biz', 'binz', 'bihnz', 'bisled'):
            _, ra, _ = instruction.decode(opcode, addr)
            result.add_branch(IndirectBranch, ra)
        elif inst_name in ('brz', 'brnz', 'brhz', 'brhnz'):
            branch_addr, _ = instruction.decode(opcode, addr)
            result.add_branch(TrueBranch, branch_addr)
            result.add_branch(FalseBranch, addr + self.address_size)
        elif inst_name in ('br', 'bra'):
            branch_addr, _ = instruction.decode(opcode, addr)
            result.add_branch(UnconditionalBranch, branch_addr)

        return result

    def perform_get_instruction_text(self, data, addr):
        instruction, opcode = self.retrieve_instruction(data)
        if instruction is None:
            return

        return instruction.get_text(opcode, addr), self.address_size

    def perform_get_instruction_low_level_il(self, data, addr, il):
        instruction, opcode = self.retrieve_instruction(data)
        if instruction is None:
            return

        il_func = instruction_il.get(instruction.name,
                                     lambda *x: il.unimplemented())
        decoded = instruction.decode(opcode, addr)
        lifted = il_func(il, addr, decoded)
        if isinstance(lifted, LowLevelILExpr):
            il.append(lifted)
        elif lifted:
            for llil in lifted:
                il.append(llil)

        return self.address_size
Esempio n. 8
0
class MSP430(Architecture):
    name = 'msp430'
    address_size = 2
    default_int_size = 2

    regs = {
        'pc': RegisterInfo('pc', 2),
        'sp': RegisterInfo('sp', 2),
        'sr': RegisterInfo('sr', 2),
        'cg': RegisterInfo('cg', 2),
        'r4': RegisterInfo('r4', 2),
        'r5': RegisterInfo('r5', 2),
        'r6': RegisterInfo('r6', 2),
        'r7': RegisterInfo('r7', 2),
        'r8': RegisterInfo('r8', 2),
        'r9': RegisterInfo('r9', 2),
        'r10': RegisterInfo('r10', 2),
        'r11': RegisterInfo('r11', 2),
        'r12': RegisterInfo('r12', 2),
        'r13': RegisterInfo('r13', 2),
        'r14': RegisterInfo('r14', 2),
        'r15': RegisterInfo('r15', 2),
    }

    flags = ['v', 'n', 'c', 'z']

    # The first flag write type is ignored currently.
    # See: https://github.com/Vector35/binaryninja-api/issues/513
    flag_write_types = ['', '*', 'cnv', 'cnz']

    flags_written_by_flag_write_type = {
        '*': ['v', 'n', 'c', 'z'],
        'cnv': ['v', 'n', 'c'],
        'cnz': ['c', 'n', 'z']
    }
    flag_roles = {
        'c': FlagRole.CarryFlagRole,
        'n': FlagRole.NegativeSignFlagRole,
        'z': FlagRole.ZeroFlagRole,
        'v': FlagRole.OverflowFlagRole
    }

    flags_required_for_flag_condition = {
        LowLevelILFlagCondition.LLFC_UGE: ['c'],
        LowLevelILFlagCondition.LLFC_ULT: ['c'],
        LowLevelILFlagCondition.LLFC_SGE: ['n', 'v'],
        LowLevelILFlagCondition.LLFC_SLT: ['n', 'v'],
        LowLevelILFlagCondition.LLFC_E: ['z'],
        LowLevelILFlagCondition.LLFC_NE: ['z'],
        LowLevelILFlagCondition.LLFC_NEG: ['n'],
        LowLevelILFlagCondition.LLFC_POS: ['n']
    }

    stack_pointer = 'sp'

    def decode_instruction(self, data, addr):
        error_value = (None, None, None, None, None, None, None, None, None)
        if len(data) < 2:
            return error_value

        instruction = struct.unpack('<H', data[0:2])[0]

        # emulated instructions
        if instruction == 0x4130:
            return 'ret', None, None, None, None, None, 2, None, None

        opcode = (instruction & 0xf000) >> 12

        mask = InstructionMask.get(opcode)
        shift = InstructionMaskShift.get(opcode)

        if mask and shift:
            instr = InstructionNames[opcode][(instruction & mask) >> shift]
        else:
            instr = InstructionNames[opcode]

        if instr is None:
            log_error('[{:x}] Bad opcode: {:x}'.format(addr, opcode))
            return error_value

        if instr not in TYPE3_INSTRUCTIONS:
            width = 1 if (instruction & 0x40) >> 6 else 2
        else:
            width = None

        src, src_operand, dst, dst_operand = GetOperands(instr, instruction)

        operand_length = 0
        if src_operand is not None:
            operand_length = OperandLengths[src_operand]
        if dst_operand is not None:
            operand_length += OperandLengths[dst_operand]

        length = 2 + operand_length

        if len(data) < length:
            return error_value

        src_value, dst_value = None, None

        if instr in TYPE3_INSTRUCTIONS:
            branch_target = (instruction & 0x3ff) << 1

            # check if it's a negative offset
            if branch_target & 0x600:
                branch_target |= 0xf800
                branch_target -= 0x10000

            src_value = addr + 2 + branch_target

        elif operand_length == 2:
            value = struct.unpack('<H', data[2:4])[0]
            if OperandLengths[src_operand]:
                src_value = value
            else:
                dst_value = value

        elif operand_length == 4:
            src_value, dst_value = struct.unpack('<HH', data[2:6])

        if instr == 'mov' and dst == 'pc':
            instr = 'br'

        return instr, width, src_operand, dst_operand, src, dst, length, src_value, dst_value

    def perform_get_instruction_info(self, data, addr):
        instr, _, _, _, _, _, length, src_value, _ = self.decode_instruction(
            data, addr)

        if instr is None:
            return None

        result = InstructionInfo()
        result.length = length

        # Add branches
        if instr in ['ret', 'reti']:
            result.add_branch(BranchType.FunctionReturn)
        elif instr in ['jmp', 'br'] and src_value is not None:
            result.add_branch(BranchType.UnconditionalBranch, src_value)
        elif instr in TYPE3_INSTRUCTIONS:
            result.add_branch(BranchType.TrueBranch, src_value)
            result.add_branch(BranchType.FalseBranch, addr + 2)
        elif instr == 'call' and src_value is not None:
            result.add_branch(BranchType.CallDestination, src_value)

        return result

    def perform_get_instruction_text(self, data, addr):
        (instr, width, src_operand, dst_operand, src, dst, length, src_value,
         dst_value) = self.decode_instruction(data, addr)

        if instr is None:
            return None

        tokens = []

        instruction_text = instr

        if width == 1:
            instruction_text += '.b'

        tokens = [
            InstructionTextToken(InstructionTextTokenType.TextToken,
                                 '{:7s}'.format(instruction_text))
        ]

        if instr in TYPE1_INSTRUCTIONS:
            tokens += OperandTokens[src_operand](src, src_value)

            tokens += [
                InstructionTextToken(InstructionTextTokenType.TextToken, ',')
            ]

            tokens += OperandTokens[dst_operand](dst, dst_value)

        elif instr in TYPE2_INSTRUCTIONS:
            tokens += OperandTokens[src_operand](src, src_value)

        elif instr in TYPE3_INSTRUCTIONS:
            tokens += OperandTokens[src_operand](src, src_value)

        return tokens, length

    def perform_get_instruction_low_level_il(self, data, addr, il):
        (instr, width, src_operand, dst_operand, src, dst, length, src_value,
         dst_value) = self.decode_instruction(data, addr)

        if instr is None:
            return None

        if InstructionIL.get(instr) is None:
            log_error('[0x{:4x}]: {} not implemented'.format(addr, instr))
            il.append(il.unimplemented())
        else:
            il_instr = InstructionIL[instr](il, src_operand, dst_operand, src,
                                            dst, width, src_value, dst_value)
            if isinstance(il_instr, list):
                for i in [i for i in il_instr if i is not None]:
                    il.append(i)
            elif il_instr is not None:
                il.append(il_instr)

        return length
Esempio n. 9
0
class CLEM(Architecture):
    name = 'clem'
    address_size = 4
    default_int_size = 3

    regs = {
        'r0': RegisterInfo('r0', 4),
        'r1': RegisterInfo('r1', 4),
        'r2': RegisterInfo('r2', 4),
        'r3': RegisterInfo('r3', 4),
        'r4': RegisterInfo('r4', 4),
        'r5': RegisterInfo('r5', 4),
        'r6': RegisterInfo('r6', 4),
        'r7': RegisterInfo('r7', 4),
        'r8': RegisterInfo('r8', 4),
        'r9': RegisterInfo('r9', 4),
        'r10': RegisterInfo('r10', 4),
        'r11': RegisterInfo('r11', 4),
        'r12': RegisterInfo('r12', 4),
        'r13': RegisterInfo('r13', 4),
        'r14': RegisterInfo('r14', 4),
        'r15': RegisterInfo('r15', 4),
        'r16': RegisterInfo('r16', 4),
        'r17': RegisterInfo('r17', 4),
        'r18': RegisterInfo('r18', 4),
        'r19': RegisterInfo('r19', 4),
        'r20': RegisterInfo('r20', 4),
        'r21': RegisterInfo('r21', 4),
        'r22': RegisterInfo('r22', 4),
        'r23': RegisterInfo('r23', 4),
        'r24': RegisterInfo('r24', 4),
        'r25': RegisterInfo('r25', 4),
        'r26': RegisterInfo('r26', 4),
        'r27': RegisterInfo('r27', 4),
        'r28': RegisterInfo('r28', 4),
        'st': RegisterInfo('st', 4),
        'ra': RegisterInfo('ra', 4),
        'pc': RegisterInfo('pc', 4),
    }

    flags = ['s', 'o', 'c', 'z']

    # The first flag write type is ignored currently.
    # See: https://github.com/Vector35/binaryninja-api/issues/513
    flag_write_types = ['', '*']

    flags_written_by_flag_write_type = {
        '*': ['s', 'o', 'c', 'z'],
    }
    flag_roles = {
        's': FlagRole.NegativeSignFlagRole,
        'o': FlagRole.OverflowFlagRole,
        'c': FlagRole.CarryFlagRole,
        'z': FlagRole.ZeroFlagRole,
    }

    flags_required_for_flag_condition = {
        LowLevelILFlagCondition.LLFC_UGE: ['c', 'z'],
        LowLevelILFlagCondition.LLFC_ULT: ['c'],
        LowLevelILFlagCondition.LLFC_SGE: ['s', 'o', 'z'],
        LowLevelILFlagCondition.LLFC_SLT: ['s', 'o'],
        LowLevelILFlagCondition.LLFC_E: ['z'],
        LowLevelILFlagCondition.LLFC_NE: ['z'],
        LowLevelILFlagCondition.LLFC_NEG: ['s'],
        LowLevelILFlagCondition.LLFC_POS: ['s']
    }

    stack_pointer = 'st'
    link_reg = 'ra'
    address_size = 3

    def find_instruction(self, addr):
        found = []
        bytes_per_size = {}
        for name, (inst_type, values) in Instructions.items():
            size = inst_type.SIZE
            if size not in bytes_per_size:
                bytes_per_size[size] = read_memory_value(addr, size)

            # If we weren't able to get the memory (we're past the end of the unpacked bytes)
            if bytes_per_size[size] == None:
                continue

            inst = inst_type.decode(inst_type, name, values, addr,
                                    bytes_per_size[size])
            if inst != None:
                found.append(inst)
        if len(found) > 1:
            for inst in found:
                if inst.name == "LA":
                    return inst
            raise RuntimeError("Multiple instructions found {}".format(
                [x.__class__.__name__ for x in found]))
        elif len(found) == 0:
            return None
        return found[0]

    def decode_instruction(self, data, addr):
        if len(data) < 4:
            return None

        instr = self.find_instruction(addr)
        if instr == None:
            log_error('[{:x}] Bad opcode'.format(addr))
            return None
        return instr

    def perform_get_instruction_info(self, data, addr):
        instr = self.decode_instruction(data, addr)
        if instr is None:
            return None

        result = InstructionInfo()
        result.length = instr.SIZE
        instr.add_branches(result)

        return result

    def perform_get_instruction_text(self, data, addr):
        instr = self.decode_instruction(data, addr)
        if instr is None:
            return None

        tokens = []

        instruction_text = instr.get_name()
        if instr.conditional_sets_flags():
            instruction_text += '.'

        tokens = [
            InstructionTextToken(InstructionTextTokenType.InstructionToken,
                                 '{:7s}'.format(instruction_text))
        ]
        operand_tokens = instr.get_operand_tokens()
        if instr.add_commas:
            for i in range(len(operand_tokens)):
                tokens.append(operand_tokens[i])
                if i != len(operand_tokens) - 1:
                    tokens.append(
                        InstructionTextToken(
                            InstructionTextTokenType.OperandSeparatorToken,
                            ","))
        else:
            tokens.extend(operand_tokens)

        return tokens, instr.SIZE

    def perform_get_instruction_low_level_il(self, data, addr, il):
        return None

    def perform_assemble(self, code, addr):
        global FILE_BYTE_STREAM
        if FILE_BYTE_STREAM == None:
            make_file_contents()

        new_insts = asm.asm(code)
        num_bytes_changed = sum([len(x) for x in new_insts])

        # Update FILE_BYTE_STREAM
        for new_inst in new_insts:
            for new_byte in new_inst:
                FILE_BYTE_STREAM.bytes[addr] = (new_byte, 1)
                addr += 1

        # Rewrite the input file
        if num_bytes_changed != 0:
            rewrite_file()

        # Give binja something so it reloads the instructions
        return ("A" * num_bytes_changed, "")

    def perform_convert_to_nop(self, data, addr):
        # There's no NOP instruction, so do an AND with r0 without flag update
        bytes_changed, error = self.perform_assemble("AN r0, r0, r0", addr)
        return bytes_changed
Esempio n. 10
0
class Brainfuck(Architecture):
    name = "Brainfuck"

    address_size = 1
    default_int_size = 1
    max_instr_length = 1

    stack_pointer = 's'

    regs = {
        'tmp': RegisterInfo('tmp', 1),
        'ptr': RegisterInfo('ptr', 4),
        's': RegisterInfo('s', 1)
    }

    def parse_instruction(self, data, addr):
        try:
            ret_data = ord(data)
        except:
            ret_data = data
        return ret_data, 1

    def get_instruction_info(self, data, addr):
        opcode, length = self.parse_instruction(data, addr)

        info = InstructionInfo()
        info.length = length

        if opcodes[opcode] == 'Close':
            info.add_branch(BranchType.UnresolvedBranch)
            info.add_branch(BranchType.FalseBranch, addr + 1)
        elif opcodes[opcode] == 'Open':
            info.add_branch(BranchType.TrueBranch, addr + 1)
            info.add_branch(BranchType.UnresolvedBranch)

        return info

    def get_instruction_text(self, data, addr):
        opcode, length = self.parse_instruction(data, addr)

        tokens = []

        op = opcodes[opcode]

        tokens.append(
            InstructionTextToken(
                InstructionTextTokenType.InstructionToken,
                "{}".format(op)
            )
        )

        return tokens, length

    def get_instruction_low_level_il(self, data, addr, il):
        opcode, length = self.parse_instruction(data, addr)

        op = opcodes[opcode]

        if addr == 0x10000:
            il.append(
                il.set_reg(4, 'ptr', il.const(1, 0))
            )

        if op == "Right":
            il.append(
                il.set_reg(4, 'ptr', il.add(
                    4, il.reg(4, 'ptr'), il.const(1, 1)), None)
            )
        elif op == "Left":
            il.append(
                il.set_reg(4, 'ptr', il.sub(
                    4, il.reg(4, 'ptr'), il.const(1, 1)), None)
            )
        elif op == "Add":
            il.append(
                il.store(1, il.reg(4, 'ptr'), il.add(
                    1, il.load(1, il.reg(4, 'ptr')), il.const(1, 1)), None)
            )
        elif op == "Subtract":
            il.append(
                il.store(1, il.reg(4, 'ptr'), il.sub(
                    1, il.load(1, il.reg(4, 'ptr')), il.const(1, 1)), None)
            )
        elif op == "In":
            il.append(
                il.unimplemented()
            )
        elif op == "Out":
            il.append(
                il.unimplemented()
            )
        elif op == "Open":
            true_label = il.get_label_for_address(
                Architecture['Brainfuck'], addr + 1)

            br = BinaryReader(il._source_function._view)
            br.seek(addr+1)
            # print("Found Open at : ", br.offset-1)
            counter = 1
            while counter != 0:
                instr = opcodes[br.read8()]
                if instr == "Open":
                    counter += 1
                elif instr == "Close":
                    counter -= 1
                    if counter == 0:
                        false_label = il.get_label_for_address(
                            Architecture['Brainfuck'], br.offset)
                        # print("Found loop close at offset : ", br.offset-1)
                        break
                elif br.offset == il._source_function._view.end:
                    print("Unfinished loop! This should never happen!")
                    return

            il.append(
                il.if_expr(il.compare_not_equal(1, il.load(
                    1, il.reg(4, 'ptr')), il.const(1, 0)), true_label, false_label)
            )
        elif op == "Close":
            false_label = il.get_label_for_address(
                Architecture['Brainfuck'], addr + 1)

            br = BinaryReader(il._source_function._view)
            br.seek(addr)
            # print("Found Close at : ", br.offset)
            counter = 1
            while counter != 0:
                br.seek_relative(-2)
                instr = opcodes[br.read8()]
                if instr == "Close":
                    counter += 1
                elif instr == "Open":
                    counter -= 1
                    if counter == 0:
                        true_label = il.get_label_for_address(
                            Architecture['Brainfuck'], br.offset)
                        # print("Found loop Open at offset : ", br.offset-1)
                        break
                elif br.offset == il._source_function._view.end:
                    print("Unfinished loop! This should never happen!")
                    return

            il.append(
                il.if_expr(il.compare_not_equal(1, il.load(
                    1, il.reg(4, 'ptr')), il.const(1, 0)), true_label, false_label)
            )
        else:
            il.append(
                il.nop()
            )

        return length
Esempio n. 11
0
class Intel8086(Architecture):
    name = "8086"
    endianness = Endianness.LittleEndian

    default_int_size = 2
    address_size = 3

    stack_pointer = 'sp'
    regs = {
        # General
        'ax': RegisterInfo('ax', 2, 0),
        'al': RegisterInfo('ax', 1, 0),
        'ah': RegisterInfo('ax', 1, 1),
        'cx': RegisterInfo('cx', 2, 0),
        'cl': RegisterInfo('cx', 1, 0),
        'ch': RegisterInfo('cx', 1, 1),
        'bx': RegisterInfo('bx', 2, 0),
        'bl': RegisterInfo('bx', 1, 0),
        'bh': RegisterInfo('bx', 1, 1),
        'dx': RegisterInfo('dx', 2, 0),
        'dl': RegisterInfo('dx', 1, 0),
        'dh': RegisterInfo('dx', 1, 1),
        'sp': RegisterInfo('sp', 2),
        'bp': RegisterInfo('bp', 2),
        'si': RegisterInfo('si', 2),
        'di': RegisterInfo('di', 2),
        # Segment
        'cs': RegisterInfo('cs', 2),
        'ds': RegisterInfo('ds', 2),
        'es': RegisterInfo('es', 2),
        'ss': RegisterInfo('ss', 2),
        # Instruction pointer
        'ip': RegisterInfo('ip', 2)
    }
    flags = [
        # Status
        'c',  # carry
        'p',  # parity
        'a',  # aux carry
        'z',  # zero
        's',  # sign
        'o',  # overflow
        # Control
        'i',  # interrupt
        'd',  # direction
        't',  # trap
    ]
    flag_roles = {
        'c': FlagRole.CarryFlagRole,
        'p': FlagRole.OddParityFlagRole,
        'a': FlagRole.HalfCarryFlagRole,
        'z': FlagRole.ZeroFlagRole,
        's': FlagRole.NegativeSignFlagRole,
        't': FlagRole.SpecialFlagRole,
        'i': FlagRole.SpecialFlagRole,
        'd': FlagRole.SpecialFlagRole,
        'o': FlagRole.OverflowFlagRole,
    }
    flag_write_types = [
        '',
        '*',
        '!c',
        'co',
    ]
    flags_written_by_flag_write_type = {
        '*': ['c', 'p', 'a', 'z', 's', 'o'],
        '!c': ['p', 'a', 'z', 's', 'o'],
        'co': ['c', 'o'],
    }
    flags_required_for_flag_condition = {
        LowLevelILFlagCondition.LLFC_E: ['z'],
        LowLevelILFlagCondition.LLFC_NE: ['z'],
        LowLevelILFlagCondition.LLFC_SLT: ['s', 'o'],
        LowLevelILFlagCondition.LLFC_ULT: ['c'],
        LowLevelILFlagCondition.LLFC_SLE: ['z', 's', 'o'],
        LowLevelILFlagCondition.LLFC_ULE: ['c', 'z'],
        LowLevelILFlagCondition.LLFC_SGE: ['s', 'o'],
        LowLevelILFlagCondition.LLFC_UGE: ['c'],
        LowLevelILFlagCondition.LLFC_SGT: ['z', 's', 'o'],
        LowLevelILFlagCondition.LLFC_UGT: ['c', 'z'],
        LowLevelILFlagCondition.LLFC_NEG: ['s'],
        LowLevelILFlagCondition.LLFC_POS: ['s'],
        LowLevelILFlagCondition.LLFC_O: ['o'],
        LowLevelILFlagCondition.LLFC_NO: ['o'],
    }

    intrinsics = {
        'outb': IntrinsicInfo([Type.int(2), Type.int(1)], []),
        'outw': IntrinsicInfo([Type.int(2), Type.int(2)], []),
        'inb': IntrinsicInfo([Type.int(1)], [Type.int(2)]),
        'inw': IntrinsicInfo([Type.int(2)], [Type.int(2)]),
    }

    def get_instruction_info(self, data, addr):
        decoded = mc.decode(data, addr)
        if decoded:
            info = InstructionInfo()
            decoded.analyze(info, addr)
            return info

    def get_instruction_text(self, data, addr):
        decoded = mc.decode(data, addr)
        if decoded:
            encoded = data[:decoded.total_length()]
            recoded = mc.encode(decoded, addr)
            if encoded != recoded:
                log_error("Instruction roundtrip error")
                log_error("".join([str(x) for x in decoded.render(addr)]))
                log_error("Orig: {}".format(encoded.hex()))
                log_error("New:  {}".format(recoded.hex()))

            return decoded.render(addr), decoded.total_length()

    def get_instruction_low_level_il(self, data, addr, il):
        decoded = mc.decode(data, addr)
        if decoded:
            decoded.lift(il, addr)
            return decoded.total_length()

    def convert_to_nop(self, data, addr):
        return b'\x90' * len(data)

    def is_always_branch_patch_available(self, data, addr):
        decoded = mc.decode(data, addr)
        if decoded:
            return isinstance(decoded, mc.instr.jmp.JmpCond)

    def always_branch(self, data, addr):
        branch = mc.decode(data, addr)
        branch = branch.to_always()
        return mc.encode(branch, addr)

    def is_invert_branch_patch_available(self, data, addr):
        decoded = mc.decode(data, addr)
        if decoded:
            return isinstance(decoded, mc.instr.jmp.JmpCond)

    def invert_branch(self, data, addr):
        branch = mc.decode(data, addr)
        branch = branch.to_inverted()
        return mc.encode(branch, addr)
Esempio n. 12
0
class SuperH(Architecture):
    name = "superh"
    endianness = Endianness.LittleEndian
    address_size = 4
    default_int_size = 2
    max_instr_length = 4
    instr_alignment = 2

    regs = dict()

    for r in registers:
        regs[r] = RegisterInfo(r, RSIZE)

    for r in system_registers:
        regs[r] = RegisterInfo(r, RSIZE)

    for r in control_registers:
        regs[r] = RegisterInfo(r, RSIZE)

    flags = ['t']
    flag_roles = {'t': FlagRole.SpecialFlagRole}

    stack_pointer = 'R15'
    link_reg = 'PR'

    system_regs = system_registers + control_registers

    def __init__(self):
        super().__init__()

    def get_instruction_info(self, data, addr):
        result = InstructionInfo()
        result.length = ISIZE

        insn = disasm_single(data, addr)

        if not insn:
            return result

        result.length = insn.size
        Brancher.find_branches(insn, result)

        return result

    def get_instruction_text(self, data, addr):
        tokens = list()
        insn = disasm_single(data, addr)

        if not insn:
            tokens.append(
                InstructionTextToken(InstructionTextTokenType.TextToken,
                                     "<unknown>"))
            return tokens, ISIZE

        for token_type, token_text in insn.tokens:
            tokens.append(InstructionTextToken(token_type, token_text))

        return tokens, insn.size

    def get_instruction_low_level_il(self, data, addr, il):
        insn = disasm_single(data, addr)

        if not insn:
            il.append(il.unimplemented())
            return None

        Lifter.lift(il, insn)

        return insn.size
Esempio n. 13
0
def get_regs():
    regs = dict()
    for i in range(16):
        n = "a{}".format(i)
        regs[n] = RegisterInfo(n, 4)

    regs['pc'] = RegisterInfo('pc', 4)
    regs['sar'] = RegisterInfo('sar', 1)  # 6 bits?
    regs['lbeg'] = RegisterInfo('lbeg', 4)
    regs['lend'] = RegisterInfo('lend', 4)
    regs['lcount'] = RegisterInfo('lcount', 4)
    regs['acclo'] = RegisterInfo('acclo', 4)
    regs['acchi'] = RegisterInfo('acchi', 4)
    regs['m0'] = RegisterInfo('m0', 4)
    regs['m1'] = RegisterInfo('m1', 4)
    regs['m2'] = RegisterInfo('m2', 4)
    regs['m3'] = RegisterInfo('m3', 4)
    regs['br'] = RegisterInfo('br', 2)
    regs['litbase'] = RegisterInfo('litbase', 3)  # 21 bits?
    regs['scompare1'] = RegisterInfo('scompare1', 4)
    regs['ps'] = RegisterInfo('ps', 2)  # 15 bits?
    # Could do like ps.intlevel here too?
    # There are a bunch of other "Special registers" that we could implement here

    return regs
Esempio n. 14
0
class RenesasM16CArchitecture(Architecture):
    name = "m16c"
    endianness = Endianness.LittleEndian

    default_int_size = 2
    address_size = 3

    stack_pointer = 'SP'
    regs = {
        # Data (banked in hardware)
        'R2R0': RegisterInfo('R2R0', 4, 0),
        'R2': RegisterInfo('R2R0', 2, 2),
        'R0': RegisterInfo('R2R0', 2, 0),
        'R0H': RegisterInfo('R0H', 1, 1),
        'R0L': RegisterInfo('R0L', 1, 0),
        'R3R1': RegisterInfo('R3R1', 4, 0),
        'R3': RegisterInfo('R3R1', 2, 2),
        'R1': RegisterInfo('R3R1', 2, 0),
        'R1H': RegisterInfo('R1H', 1, 1),
        'R1L': RegisterInfo('R1L', 1, 0),
        # Address
        'A1A0': RegisterInfo('A1A0', 4, 0),
        'A1': RegisterInfo('A1A0', 2, 2),
        'A0': RegisterInfo('A1A0', 2, 0),
        # Frame base (banked in hardware)
        'FB': RegisterInfo('FB', 2, 0),
        # Program counter
        'PC': RegisterInfo('PC', 3, 0),
        # Stack pointer (banked in hardware as USP/ISP)
        'SP': RegisterInfo('SP', 2, 0),
        # Static base
        'SB': RegisterInfo('SB', 2, 0),
        # Interrupt base
        'INTB': RegisterInfo('INTB', 4, 0),
        'INTBH': RegisterInfo('INTB', 1, 2),
        'INTBL': RegisterInfo('INTB', 2, 0),
    }
    flags = [
        'C',  # Carry
        'D',  # Debug
        'Z',  # Zero
        'S',  # Sign
        'B',  # Register bank select
        'O',  # Overflow
        'I',  # Interrupt enable
        'U',  # Stack pointer select
        # IPL is not modelled
    ]
    flag_roles = {
        'C': FlagRole.CarryFlagRole,
        'D': FlagRole.SpecialFlagRole,
        'Z': FlagRole.ZeroFlagRole,
        'S': FlagRole.NegativeSignFlagRole,
        'B': FlagRole.SpecialFlagRole,
        'O': FlagRole.OverflowFlagRole,
        'I': FlagRole.SpecialFlagRole,
        'U': FlagRole.SpecialFlagRole,
    }
    flags_required_for_flag_condition = {
        LowLevelILFlagCondition.LLFC_E: ['Z'],
        LowLevelILFlagCondition.LLFC_NE: ['Z'],
        LowLevelILFlagCondition.LLFC_POS: ['S'],
        LowLevelILFlagCondition.LLFC_NEG: ['S'],
        LowLevelILFlagCondition.LLFC_SGE: ['S', 'O'],
        LowLevelILFlagCondition.LLFC_SLT: ['S', 'O'],
        LowLevelILFlagCondition.LLFC_SGT: ['Z', 'S', 'O'],
        LowLevelILFlagCondition.LLFC_SLE: ['Z', 'S', 'O'],
        LowLevelILFlagCondition.LLFC_UGE: ['C'],
        LowLevelILFlagCondition.LLFC_ULT: ['C'],
        LowLevelILFlagCondition.LLFC_UGT: ['C', 'Z'],
        LowLevelILFlagCondition.LLFC_ULE: ['C', 'Z'],
        LowLevelILFlagCondition.LLFC_O: ['O'],
        LowLevelILFlagCondition.LLFC_NO: ['O'],
    }

    def get_instruction_info(self, data, addr):
        decoded = mc.decode(data, addr)
        if decoded:
            info = InstructionInfo()
            decoded.analyze(info, addr)
            return info

    def get_instruction_text(self, data, addr):
        decoded = mc.decode(data, addr)
        if decoded:
            encoded = data[:decoded.length()]
            recoded = mc.encode(decoded, addr)
            if encoded != recoded:
                log.log_error("Instruction roundtrip error")
                log.log_error("".join([str(x) for x in decoded.render(addr)]))
                log.log_error("Orig: {}".format(encoded.hex()))
                log.log_error("New:  {}".format(recoded.hex()))

            decoded.show_suffix = Settings().get_bool('arch.m16c.showSuffix')
            return decoded.render(addr), decoded.length()

    def get_instruction_low_level_il(self, data, addr, il):
        decoded = mc.decode(data, addr)
        if decoded:
            decoded.lift(il, addr)
            return decoded.length()

    def convert_to_nop(self, data, addr):
        return b'\x04' * len(data)
Esempio n. 15
0
class RISCV(Architecture):
    name = "riscv"

    address_size = 4
    default_int_size = 4
    max_instr_length = 4

    endianness = Endianness.LittleEndian

    disassembler = RVDisassembler(address_size)
    lifter = Lifter(address_size)

    # we are using the ABI names here, as those are also the register names
    # returned by capstone.
    regs = {
        # x0 - hard-wired zero
        "zero": RegisterInfo("zero", address_size),
        # x1 - return address (caller saved)
        "ra": RegisterInfo("ra", address_size),
        # x2 - stack pointer (caller saved)
        "sp": RegisterInfo("sp", address_size),
        # x3 - global pointer
        "gp": RegisterInfo("gp", address_size),
        # x4 - threat pointer
        "tp": RegisterInfo("tp", address_size),
        # x5-7 - temporaries (caller saved)
        "t0": RegisterInfo("t0", address_size),
        "t1": RegisterInfo("t1", address_size),
        "t2": RegisterInfo("t2", address_size),
        # x8 - saved register / frame pointer (caller saved)
        "s0": RegisterInfo("s0", address_size),
        # x9 - saved register
        "s1": RegisterInfo("s1", address_size),
        # x10-x11 - first function argument and return value (caller saved)
        "a0": RegisterInfo("a0", address_size),
        "a1": RegisterInfo("a1", address_size),
        # x12-17 - function arguments (caller saved)
        "a2": RegisterInfo("a2", address_size),
        "a3": RegisterInfo("a3", address_size),
        "a4": RegisterInfo("a4", address_size),
        "a5": RegisterInfo("a5", address_size),
        "a6": RegisterInfo("a6", address_size),
        "a7": RegisterInfo("a7", address_size),
        # x18-27 - saved registers (caller saved
        "s2": RegisterInfo("s2", address_size),
        "s3": RegisterInfo("s3", address_size),
        "s4": RegisterInfo("s4", address_size),
        "s5": RegisterInfo("s5", address_size),
        "s6": RegisterInfo("s6", address_size),
        "s7": RegisterInfo("s7", address_size),
        "s8": RegisterInfo("s8", address_size),
        "s9": RegisterInfo("s9", address_size),
        "s10": RegisterInfo("s10", address_size),
        "s11": RegisterInfo("s11", address_size),
        # x28-31 - temporaries
        "t3": RegisterInfo("t3", address_size),
        "t4": RegisterInfo("t4", address_size),
        "t5": RegisterInfo("t5", address_size),
        "t6": RegisterInfo("t6", address_size),
        # pc
        "pc": RegisterInfo("pc", address_size),
    }

    stack_pointer = "sp"

    def get_instruction_info(self, data, addr):

        instr = self.disassembler.decode(data, addr)

        if instr is None:
            return None

        result = InstructionInfo()
        result.length = instr.size

        dest = addr + instr.imm

        if instr.name == 'ret':
            result.add_branch(BranchType.FunctionReturn)
        elif instr.name in branch_ins:
            result.add_branch(BranchType.TrueBranch, dest)
            result.add_branch(BranchType.FalseBranch, addr + 4)
        elif instr.name in direct_call_ins:
            result.add_branch(BranchType.CallDestination, dest)
        elif instr.name in indirect_call_ins:
            result.add_branch(BranchType.UnresolvedBranch)

        return result

    def get_instruction_text(self, data, addr):

        instr = self.disassembler.decode(data, addr)

        if instr is None:
            return None

        tokens = gen_token(instr)

        return tokens, instr.size

    def get_instruction_low_level_il(self, data, addr, il):

        instr = self.disassembler.decode(data, addr)

        if instr is None:
            return None
        self.lifter.lift(il, instr, instr.name)

        return instr.size
Esempio n. 16
0
class M6800(Architecture):
    '''M6800 Architecture class.'''
    name = 'M6800'
    address_size = 2
    default_int_size = 2

    regs = {
        'SP': RegisterInfo('SP', 2),  # Stack Pointer
        'PC': RegisterInfo('PC', 2),  # Program Counter
        'IX': RegisterInfo('IX', 2),  # Index Register
        'ACCA': RegisterInfo('ACCA', 1),  # Accumulator A
        'ACCB': RegisterInfo('ACCB', 1)  # Accumulator B
    }

    flags = ['C', 'V', 'Z', 'N', 'I', 'H']

    flag_roles = {
        'C': FlagRole.CarryFlagRole,
        'V': FlagRole.OverflowFlagRole,
        'Z': FlagRole.ZeroFlagRole,
        'N': FlagRole.NegativeSignFlagRole,
        'I': FlagRole.SpecialFlagRole,  # Interrupt Flag
        'H': FlagRole.HalfCarryFlagRole
    }

    flag_write_types = ['', 'HNZVC', 'NZVC', 'NZV', 'Z']

    flags_written_by_flag_write_type = {
        'HNZVC': ['H', 'N', 'Z', 'V', 'C'],
        'NZVC': ['N', 'Z', 'V', 'C'],
        'NZV': ['N', 'Z', 'V'],
        'Z': ['Z']
    }

    flags_required_for_flag_condition = {
        LowLevelILFlagCondition.LLFC_UGE: ['C'],
        LowLevelILFlagCondition.LLFC_UGT: ['C', 'Z'],
        LowLevelILFlagCondition.LLFC_ULE: ['C', 'Z'],
        LowLevelILFlagCondition.LLFC_ULT: ['C'],
        LowLevelILFlagCondition.LLFC_SGE: ['N', 'V'],
        LowLevelILFlagCondition.LLFC_SLT: ['N', 'V'],
        LowLevelILFlagCondition.LLFC_SGT: ['Z', 'N', 'V'],
        LowLevelILFlagCondition.LLFC_SLE: ['Z', 'N', 'V'],
        LowLevelILFlagCondition.LLFC_E: ['Z'],
        LowLevelILFlagCondition.LLFC_NE: ['Z'],
        LowLevelILFlagCondition.LLFC_NEG: ['N'],
        LowLevelILFlagCondition.LLFC_POS: ['N'],
        LowLevelILFlagCondition.LLFC_O: ['V'],
        LowLevelILFlagCondition.LLFC_NO: ['V']
    }

    stack_pointer = 'SP'

    # pylint: disable=invalid-name
    @staticmethod
    def _handle_jump(il: LowLevelILFunction, value):
        label = il.get_label_for_address(Architecture['M6800'], value)

        return il.jump(il.const(2, value)) if label is None else il.goto(label)

    # pylint: disable=invalid-name
    @staticmethod
    def _handle_branch(il: LowLevelILFunction, nmemonic, inst_length, value):
        true_label = il.get_label_for_address(Architecture['M6800'], value)

        if true_label is None:
            true_label = LowLevelILLabel()
            indirect = True
        else:
            indirect = False

        false_label_found = True

        false_label = il.get_label_for_address(
            Architecture['M6800'], il.current_address + inst_length)

        if false_label is None:
            false_label = LowLevelILLabel()
            false_label_found = False

        il.append(
            il.if_expr(LLIL_OPERATIONS[nmemonic](il, None, None), true_label,
                       false_label))

        if indirect:
            il.mark_label(true_label)
            il.append(il.jump(il.const(2, value)))

        if not false_label_found:
            il.mark_label(false_label)

    @staticmethod
    def _decode_instruction(data, addr):
        opcode = data[0]
        try:
            nmemonic, inst_length, inst_operand, inst_type, mode = INSTRUCTIONS[
                opcode]
        except KeyError:
            raise LookupError(
                f'Opcode 0x{opcode:X} at address 0x{addr:X} is invalid.')

        value = None

        # need to collect information based on each address mode
        # INHERENT addressing => value is None
        # ACCUMULATOR addressing => value is in accumulator
        try:
            if mode == AddressMode.RELATIVE:  # calculate absolute address here
                # should always be 2 bytes long, second byte is 2's complement
                value = addr + inst_length + int.from_bytes(
                    data[1:2], 'big', signed=True)
                # use address mask to set value to real space
                value &= ADDRESS_MASK
            elif mode == AddressMode.IMMEDIATE:
                if inst_length == 2:
                    value = data[1]
                else:
                    value = struct.unpack('>H', data[1:3])[0]
                    # use address mask to set value to real space
                    value &= ADDRESS_MASK
            elif mode == AddressMode.EXTENDED:
                value = struct.unpack('>H', data[1:3])[0]
                # use address mask to set value to real space
                value &= ADDRESS_MASK
            elif mode in [AddressMode.INDEXED, AddressMode.DIRECT]:
                value = data[1]
        except struct.error:
            raise LookupError(
                f'Unable to decode instruction at address 0x{addr}')

        return nmemonic, inst_length, inst_operand, inst_type, mode, value

    def get_instruction_text(self, data, addr):
        try:
            (nmemonic, inst_length, inst_operand, _, mode,
             value) = M6800._decode_instruction(data, addr)
        except LookupError as error:
            log_error(error.__str__())
            return None

        tokens = [InstructionTextToken(ITTT.InstructionToken, nmemonic)]

        if mode == AddressMode.ACCUMULATOR:
            tokens.append(InstructionTextToken(ITTT.OperandSeparatorToken,
                                               ' '))
            tokens.append(
                InstructionTextToken(ITTT.RegisterToken, inst_operand))
        elif mode in [
                AddressMode.DIRECT, AddressMode.EXTENDED, AddressMode.RELATIVE
        ]:
            tokens.append(InstructionTextToken(ITTT.OperandSeparatorToken,
                                               ' '))
            tokens.append(
                InstructionTextToken(ITTT.PossibleAddressToken, f'0x{value:X}',
                                     value))
        elif mode == AddressMode.IMMEDIATE:
            if inst_operand in ['ACCA', 'ACCB']:
                tokens.append(
                    InstructionTextToken(ITTT.OperandSeparatorToken, ' '))
                tokens.append(
                    InstructionTextToken(ITTT.RegisterToken, inst_operand))
            tokens.append(InstructionTextToken(ITTT.OperandSeparatorToken,
                                               ' '))
            tokens.append(
                InstructionTextToken(ITTT.IntegerToken, f'0x{value:X}', value))
        elif mode == AddressMode.INDEXED:
            if inst_operand in ['ACCA', 'ACCB']:
                tokens.append(
                    InstructionTextToken(ITTT.OperandSeparatorToken, ' '))
                tokens.append(
                    InstructionTextToken(ITTT.RegisterToken, inst_operand))
            tokens.append(InstructionTextToken(ITTT.OperandSeparatorToken,
                                               ' '))
            tokens.append(
                InstructionTextToken(ITTT.BeginMemoryOperandToken, '['))
            tokens.append(InstructionTextToken(ITTT.RegisterToken, 'IX'))
            tokens.append(
                InstructionTextToken(ITTT.OperandSeparatorToken, ' + '))
            tokens.append(
                InstructionTextToken(ITTT.IntegerToken, f'0x{value:X}', value))
            tokens.append(InstructionTextToken(ITTT.EndMemoryOperandToken,
                                               ']'))

        return tokens, inst_length

    def get_instruction_info(self, data, addr):
        try:
            (_, inst_length, _, inst_type, mode,
             value) = M6800._decode_instruction(data, addr)
        except LookupError as error:
            log_error(error.__str__())
            return None

        inst = InstructionInfo()
        inst.length = inst_length

        if inst_type == InstructionType.CONDITIONAL_BRANCH:
            if mode == AddressMode.INDEXED:
                inst.add_branch(BranchType.UnresolvedBranch)
            else:
                inst.add_branch(BranchType.TrueBranch, value)
                inst.add_branch(BranchType.FalseBranch, addr + inst_length)
        elif inst_type == InstructionType.UNCONDITIONAL_BRANCH:
            if mode == AddressMode.INDEXED:
                inst.add_branch(BranchType.UnresolvedBranch)
            else:
                inst.add_branch(BranchType.UnconditionalBranch, value)
        elif inst_type == InstructionType.CALL:
            if mode == AddressMode.INDEXED:
                inst.add_branch(BranchType.UnresolvedBranch)
            else:
                inst.add_branch(BranchType.CallDestination, value)
        elif inst_type == InstructionType.RETURN:
            inst.add_branch(BranchType.FunctionReturn)

        return inst

    def get_instruction_low_level_il(self, data, addr, il: LowLevelILFunction):
        try:
            (nmemonic, inst_length, inst_operand, inst_type, mode,
             value) = M6800._decode_instruction(data, addr)
        except LookupError as error:
            log_error(error.__str__())
            return None

        # Figure out what the instruction uses
        load_size = 2 if nmemonic in BIGGER_LOADS else 1
        operand, second_operand = None, None

        # if this is a conditional branch, handle that separately
        if inst_type == InstructionType.CONDITIONAL_BRANCH:
            M6800._handle_branch(il, nmemonic, inst_length, value)
            return inst_length

        # if this is an unconditional branch, handle that separately
        if inst_type == InstructionType.UNCONDITIONAL_BRANCH:
            M6800._handle_jump(il, value)
            return inst_length

        if mode == AddressMode.ACCUMULATOR:
            # handle the case where we need the name, not the reg, for pop
            operand = inst_operand if nmemonic == 'PUL' else il.reg(
                1, inst_operand)
        elif mode == AddressMode.INDEXED:
            # set the destination variable for the memory store operations
            destination = il.add(2, il.reg(2, 'IX'), il.const(1, value))
            operand = il.load(load_size, destination)
        elif mode in [AddressMode.DIRECT, AddressMode.EXTENDED]:
            # set the destination variable for the memory store operations
            destination = il.const(inst_length - 1, value)
            operand = il.load(load_size, destination)
        elif mode == AddressMode.IMMEDIATE:
            operand = il.const(inst_length - 1, value)
        elif mode == AddressMode.RELATIVE:
            # we have already calculated the absolute address
            # set the destination variable for the memory store operations
            destination = il.const(2, value)
            operand = il.load(load_size, destination)

        # if we are dual mode, we have to handle things special
        if inst_type == InstructionType.DUAL:
            second_operand = inst_operand

        # calculate the base LLIL
        operation = LLIL_OPERATIONS[nmemonic](il, operand, second_operand)

        # if the instruction has different destinations, set them appropriately
        if nmemonic in REGISTER_OR_MEMORY_DESTINATIONS:
            if mode == AddressMode.ACCUMULATOR:
                operation = il.set_reg(1, inst_operand, operation)
            else:
                operation = il.store(1, destination, operation)

        # Finally, calculate and append the instruction(s)
        il.append(operation)

        return inst_length
Esempio n. 17
0
class AVR(Architecture):
    name = 'AVR'
    address_size = 2
    default_int_size = 2
    max_instr_length = 4
    regs = {
        'r0': RegisterInfo('r0', 1),
        'r1': RegisterInfo('r1', 1),
        'r2': RegisterInfo('r2', 1),
        'r3': RegisterInfo('r3', 1),
        'r4': RegisterInfo('r4', 1),
        'r5': RegisterInfo('r5', 1),
        'r6': RegisterInfo('r6', 1),
        'r7': RegisterInfo('r7', 1),
        'r8': RegisterInfo('r8', 1),
        'r9': RegisterInfo('r9', 1),
        'r10': RegisterInfo('r10', 1),
        'r11': RegisterInfo('r11', 1),
        'r12': RegisterInfo('r12', 1),
        'r13': RegisterInfo('r13', 1),
        'r14': RegisterInfo('r14', 1),
        'r15': RegisterInfo('r15', 1),
        'r16': RegisterInfo('r16', 1),
        'r17': RegisterInfo('r17', 1),
        'r18': RegisterInfo('r18', 1),
        'r19': RegisterInfo('r19', 1),
        'r20': RegisterInfo('r20', 1),
        'r21': RegisterInfo('r21', 1),
        'r22': RegisterInfo('r22', 1),
        'r23': RegisterInfo('r23', 1),
        'r24': RegisterInfo('r24', 1),
        'r25': RegisterInfo('r25', 1),
        'r26': RegisterInfo('r26', 1),
        'r27': RegisterInfo('r27', 1),
        'r28': RegisterInfo('r28', 1),
        'r29': RegisterInfo('r29', 1),
        'r30': RegisterInfo('r30', 1),
        'r31': RegisterInfo('r31', 1)
    }
    stack_pointer = 'SP'
    flags = ['C', 'Z', 'N', 'V', 'S', 'H', 'T', 'I']
    flag_write_types = [
        '', '*', 'onlyT', 'svnz', 'onlyC', 'onlyH', 'onlyI', 'onlyN', 'onlyS',
        'onlyV', 'onlyZ', 'svnzc', 'hsvnzc', 'zc'
    ]
    flags_written_by_flag_write_type = {
        '*': ['C', 'Z', 'N', 'V', 'S', 'H', 'T', 'I'],
        'onlyT': ['T'],
        'svnz': ['S', 'V', 'N', 'Z'],
        'onlyC': ['C'],
        'onlyH': ['H'],
        'onlyI': ['I'],
        'onlyN': ['N'],
        'onlyS': ['S'],
        'onlyV': ['V'],
        'onlyZ': ['Z'],
        'svnzc': ['S', 'V', 'N', 'Z', 'C'],
        'hsvnzc': ['H', 'S', 'V', 'N', 'Z', 'C'],
        'zc': ['Z', 'C']
    }
    flag_roles = {
        'C': FlagRole.CarryFlagRole,
        'Z': FlagRole.ZeroFlagRole,
        'N': FlagRole.NegativeSignFlagRole,
        'V': FlagRole.OverflowFlagRole,
        'S': FlagRole.SpecialFlagRole,  #TODO
        'H': FlagRole.SpecialFlagRole,  #TODO
        'T': FlagRole.SpecialFlagRole,  #TODO
        'I': FlagRole.SpecialFlagRole  #TODO
    }

    # flags_required_for_flag_condition = {
    #     LLFC_E : ['Z'], #Equal
    #     LLFC_NE : ['Z'], #Not Equal
    #     LLFC_SLT : ['N'], #Signed Less Than
    #     LLFC_ULT : [''], #Unsigned Less Than
    #     LLFC_SLE : ['N'], #Signed Less Then or Equal to
    #     LLFC_ULE : [''], #Unsigned Less Than or Equal to
    #     LLFC_SGE : ['N'], #Signed Greather Than
    #     LLFC_UGE : [''], #Unsigned Greater Than
    #     LLFC_SGT : ['N'], #Signed Greater Than
    #     LLFC_UGT : ['C'], #Unsigned Greater Than
    #     LLFC_NEG : ['N'], #Negative
    #     LLFC_POS : ['N'], #Positive
    #     LLFC_O : ['V'], #Overflow
    #     LLFC_NO : ['V'] #No Overflow
    # }

    def decode_instruction(self, data, addr):
        error_value = (None, None, None, None, None, None, None, None, None)
        if len(data) < 2:
            return error_value

        instruction = struct.unpack('<H', data[0:2])[0]

        #print("Current Instruction is " + str(hex(instruction)))

        if instruction == 0x95C8:
            return 'lpm', None, None, None, None, None, 2, None, None
        elif instruction == 0x95D8:
            return 'elpm', None, None, None, None, None, 2, None, None
        elif instruction == 0x0000:
            return 'nop', None, None, None, None, None, 2, None, None
        elif instruction == 0x9508:
            return 'ret', None, None, None, None, None, 2, None, None
        elif instruction == 0x9518:
            return 'reti', None, None, None, None, None, 2, None, None
        elif instruction == 0x9408:
            return 'sec', None, None, None, None, None, 2, None, None
        elif instruction == 0x9458:
            return 'seh', None, None, None, None, None, 2, None, None
        elif instruction == 0x9478:
            return 'sei', None, None, None, None, None, 2, None, None
        elif instruction == 0x9428:
            return 'sen', None, None, None, None, None, 2, None, None
        elif instruction == 0x9448:
            return 'ses', None, None, None, None, None, 2, None, None
        elif instruction == 0x9468:
            return 'set', None, None, None, None, None, 2, None, None
        elif instruction == 0x9438:
            return 'sev', None, None, None, None, None, 2, None, None
        elif instruction == 0x9418:
            return 'sez', None, None, None, None, None, 2, None, None
        elif instruction == 0x9588:
            return 'sleep', None, None, None, None, None, 2, None, None
        elif instruction == 0x95E8:
            return 'spm', None, None, None, None, None, 2, None, None
        elif instruction == 0x95F8:  #TODO
            return 'spm z+', None, None, None, None, None, 2, None, None
        elif instruction == 0x95A8:
            return 'wdr', None, None, None, None, None, 2, None, None
        elif instruction == 0x9598:
            return 'break', None, None, None, None, None, 2, None, None
        elif instruction == 0x9488:
            return 'clc', None, None, None, None, None, 2, None, None
        elif instruction == 0x94D8:
            return 'clh', None, None, None, None, None, 2, None, None
        elif instruction == 0x94F8:
            return 'cli', None, None, None, None, None, 2, None, None
        elif instruction == 0x94A8:
            return 'cln', None, None, None, None, None, 2, None, None
        elif instruction == 0x94C8:
            return 'cls', None, None, None, None, None, 2, None, None
        elif instruction == 0x94E8:
            return 'clt', None, None, None, None, None, 2, None, None
        elif instruction == 0x94B8:
            return 'clv', None, None, None, None, None, 2, None, None
        elif instruction == 0x9498:
            return 'clz', None, None, None, None, None, 2, None, None
        elif instruction == 0x9519:
            return 'eicall', None, None, None, None, None, 2, None, None
        elif instruction == 0x9419:
            return 'eijmp', None, None, None, None, None, 2, None, None
        elif instruction == 0x9509:
            return 'icall', None, None, None, None, None, 2, None, None
        elif instruction == 0x9409:
            return 'ijmp', None, None, None, None, None, 2, None, None

        #High byte most significant nibble
        high_msn = (instruction & 0xf000) >> 12
        #print("The high byte most significant nibble is : " + str(high_msn))

        instr = get_instr_name(instruction, high_msn)

        if instr is None:
            log_error('Bad opcode: {:x}'.format(instruction))
            return error_value

        if instr == 'sts' or instr == 'lds' or instr == 'call' or instr == 'jmp':
            width = 2
        else:
            width = None

        src, src_operand_type, dst, dst_operand_type = GetOperands(
            instr, instruction)

        if width != None:
            length = 2 + width
        else:
            length = 2

        if length == 4:
            direct_addr = struct.unpack('<H', data[2:4])[0]
            if instr == 'sts':
                dst = direct_addr
            elif instr == 'lds':
                src = direct_addr
            elif instr == 'call':
                dst = direct_addr
            elif instr == 'jmp':
                dst = direct_addr

        src_value, dst_value = None, None

        return instr, width, src_operand_type, dst_operand_type, src, dst, length, src_value, dst_value

    def perform_get_instruction_info(self, data, addr):
        instr, _, _, _, _, dst, length, src_value, _ = self.decode_instruction(
            data, addr)

        if instr is None:
            return None

        result = InstructionInfo()
        result.length = length

        if instr == 'ret':
            result.add_branch(BranchType.FunctionReturn)
        elif instr == 'reti':
            result.add_branch(BranchType.FunctionReturn)
        elif instr == 'call':
            result.add_branch(BranchType.CallDestination, dst * 2)
        elif instr == 'rcall':
            result.add_branch(BranchType.CallDestination,
                              addr + dst * 2 + 1 * 2)
        elif instr == 'jmp':
            result.add_branch(BranchType.UnconditionalBranch, dst * 2)
        elif instr == 'rjmp':
            result.add_branch(BranchType.UnconditionalBranch,
                              addr + dst * 2 + 1 * 2)
        elif (instr == 'breq' or instr == 'brne' or instr == 'brcs'
              or instr == 'brcc' or instr == 'brsh' or instr == 'brlo'
              or instr == 'brmi' or instr == 'brpl' or instr == 'brge'
              or instr == 'brlt' or instr == 'brhs' or instr == 'brhc'
              or instr == 'brts' or instr == 'brtc' or instr == 'brvs'
              or instr == 'brvc' or instr == 'brie' or instr == 'brid'):
            result.add_branch(BranchType.TrueBranch, addr + dst * 2 + 1 * 2)
            result.add_branch(BranchType.FalseBranch, addr + 1 * 2)
        elif (instr == 'brbs' or instr == 'brbc'):
            result.add_branch(BranchType.TrueBranch, addr + dst * 2 + 1 * 2)
            result.add_branch(BranchType.FalseBranch, addr + 1 * 2)
        elif (instr == 'cpse' or instr == 'sbrc' or instr == 'sbrs'
              or instr == 'sbic' or instr == 'sbis'):
            result.add_branch(BranchType.TrueBranch, addr + 2 * 2)
            result.add_branch(BranchType.FalseBranch, addr + 1 * 2)
        elif (instr == 'icall' or instr == 'ijmp'):
            result.add_branch(BranchType.IndirectBranch)

        #TODO

        return result

    def perform_get_instruction_text(self, data, addr):
        instr, width, src_operand_type, dst_operand_type, src, dst, length, src_value, dst_value = self.decode_instruction(
            data, addr)

        if instr is None:
            return None

        tokens = []

        instruction_text = instr

        tokens = [
            InstructionTextToken(InstructionTextTokenType.TextToken,
                                 '{:7s}'.format(instruction_text))
        ]

        if dst_operand_type != None:
            tokens += OperandTokenGen[dst_operand_type](dst, addr, instr)
        #
        if dst_operand_type != None and src_operand_type != None:
            tokens += [
                InstructionTextToken(InstructionTextTokenType.TextToken, ',')
            ]
        #
        if src_operand_type != None:
            tokens += OperandTokenGen[src_operand_type](src, addr, instr)

        return tokens, length

    #TODO
    def perform_get_instruction_low_level_il(self, data, addr, il):
        instr, width, src_operand_type, dst_operand_type, src, dst, length, src_value, dst_value = self.decode_instruction(
            data, addr)

        if instr is None:
            return None

        # if InstructionIL.get(instr) is None:
        #     log_error('[0x{:4x}]: {} not implemented'.format(addr, instr))
        #     il.append(il.unimplemented())
        #
        #
        return length

    def perform_get_flag_write_low_level_il(self, op, size, write_type, flag,
                                            operands, il):
        return

    def perform_get_flag_condition_low_level_il(self, cond, il):
        return
    def write_register(self, reg_name: str, value: int):
        registers = self.view.session_data.get("emulator.registers", [])
        if not registers:
            self.view.session_data["emulator.registers"] = registers

        regs = {
            r[0]: (i, r[1])
            for i, r in enumerate(
                self.view.session_data.get("emulator.registers", []))
        }

        if reg_name.startswith('temp'):
            register = RegisterInfo(reg_name, self.view.address_size)
        else:
            register = self.view.arch.regs[reg_name]

        size = register.size
        offset = register.offset
        extend = register.extend
        full_width_reg = register.full_width_reg

        if full_width_reg == reg_name:
            if not regs or reg_name.startswith('temp'):
                regs[reg_name] = (0, None)
            execute_on_main_thread_and_wait(
                self.view.session_data["emulator.registers.model"].startUpdate)
            registers[regs[reg_name][0]] = (reg_name, value)
            execute_on_main_thread_and_wait(
                self.view.session_data["emulator.registers.model"].endUpdate)

            if reg_name == self.view.arch.stack_pointer:
                execute_on_main_thread_and_wait(lambda: self.view.session_data[
                    'emulator.stack.model'].update(value))
            return

        full_width_value = self.read_register(full_width_reg)

        mask = (1 << (offset * 8)) - 1
        mask ^= (1 << ((size + offset) * 8)) - 1
        shifted_value = value << (offset * 8)
        masked_value = shifted_value & mask

        full_width_size = self.view.arch.regs[full_width_reg].size

        full_width_mask = (1 << (full_width_size * 8)) - 1
        full_width_mask ^= mask

        if extend == ImplicitRegisterExtend.NoExtend:
            full_width_value = masked_value | (full_width_mask
                                               & full_width_value)

        elif extend == ImplicitRegisterExtend.ZeroExtendToFullWidth:
            full_width_value = masked_value | (full_width_value &
                                               ((1 <<
                                                 ((size + offset) * 8)) - 1))

        elif extend == ImplicitRegisterExtend.SignExtendToFullWidth:
            sign_bit = shifted_value & (1 << ((size + offset - 1) * 8))
            full_width_value = masked_value | (full_width_value &
                                               ((1 <<
                                                 ((size + offset) * 8)) - 1))
            if sign_bit:
                full_width_value |= full_width_mask ^ ((1 << (
                    (size + offset) * 8)) - 1)

        if not regs:
            regs[full_width_reg] = (full_width_reg, full_width_value)

        execute_on_main_thread_and_wait(
            self.view.session_data["emulator.registers.model"].startUpdate)
        registers[regs[full_width_reg][0]] = (full_width_reg, full_width_value)
        execute_on_main_thread_and_wait(
            self.view.session_data["emulator.registers.model"].endUpdate)
Esempio n. 19
0
class VMArch(Architecture):
    name = "VMArch"

    address_size = 1
    default_int_size = 1
    max_instr_length = 3

    stack_pointer = 's'

    regs = {
        'k': RegisterInfo('k', 1),
        'c': RegisterInfo('c', 1),
        's': RegisterInfo('s', 1)
    }

    def parse_instruction(self, data, addr):
        opcode, offset, value = data[:3]

        return opcode, offset, value, 3

    def get_instruction_info(self, data, addr):
        opcode, offset, value, length = self.parse_instruction(data, addr)

        info = InstructionInfo()
        info.length = length

        if opcodes[opcode] == 'hlt':
            info.add_branch(BranchType.FunctionReturn)

        return info

    def get_instruction_text(self, data, addr):
        opcode, offset, value, length = self.parse_instruction(data, addr)

        tokens = []

        op = opcodes[opcode]

        # create the opcode token
        tokens.append(
            InstructionTextToken(InstructionTextTokenType.InstructionToken,
                                 f'{op:<.6s}',
                                 value=opcode))

        # create the offset token
        if op != 'hlt':
            tokens.append(
                InstructionTextToken(
                    InstructionTextTokenType.PossibleAddressToken,
                    f'  {offset}',
                    value=offset,
                    size=1))

        if op == 'set':
            tokens.append(
                InstructionTextToken(InstructionTextTokenType.IntegerToken,
                                     f'  {value}',
                                     value=value,
                                     size=1))

        return tokens, length

    def get_instruction_low_level_il(self, data, addr, il):
        opcode, offset, value, length = self.parse_instruction(data, addr)

        op = opcodes[opcode]

        # [offset].b = value
        if op == 'set':
            il.append(il.store(1, il.const(1, offset), il.const(1, value)))

        # c = [offset].b
        elif op == 'get':
            il.append(il.set_reg(1, 'c', il.load(1, il.const(1, offset))))

        # [offset].b = [offset].b ^ c
        elif op == 'xor':
            il.append(il.set_reg(1, 'k', il.load(1, il.const(1, offset))))
            il.append(
                il.store(1, il.const(1, offset),
                         il.xor_expr(1, il.reg(1, 'k'), il.reg(1, 'c'))))
        elif op == 'hlt':
            il.append(il.no_ret())

        return length
Esempio n. 20
0
class VMNDH(Architecture):
    name = 'vmndh-2k12'
    address_size = 2
    default_int_size = 2
    max_instr_length = 5

    regs = {
        'r0': RegisterInfo('r0', 2),
        'r1': RegisterInfo('r1', 2),
        'r2': RegisterInfo('r2', 2),
        'r3': RegisterInfo('r3', 2),
        'r4': RegisterInfo('r4', 2),
        'r5': RegisterInfo('r5', 2),
        'r6': RegisterInfo('r6', 2),
        'r7': RegisterInfo('r7', 2),
        'sp': RegisterInfo('sp', 2),
        'bp': RegisterInfo('bp', 2),
        'pc': RegisterInfo('pc', 2),
    }

    flags = ['a', 'b', 'z']

    # The first flag write type is ignored currently.
    # See: https://github.com/Vector35/binaryninja-api/issues/513
    flag_write_types = ['', '*', 'a', 'b', 'z']

    flags_written_by_flag_write_type = {'*': ['a', 'b', 'z'], 'z': ['z']}
    flag_roles = {
        'a': FlagRole.CarryFlagRole,
        'b': FlagRole.NegativeSignFlagRole,
        'z': FlagRole.ZeroFlagRole,
        #'v': FlagRole.OverflowFlagRole
    }

    # WHAT IS THIS????
    flags_required_for_flag_condition = {
        #		LowLevelILFlagCondition.LLFC_UGE: ['c'],
        #		LowLevelILFlagCondition.LLFC_ULT: ['c'],
        LowLevelILFlagCondition.LLFC_SGT: ['a'],
        LowLevelILFlagCondition.LLFC_SLT: ['b'],
        LowLevelILFlagCondition.LLFC_E: ['z'],
        LowLevelILFlagCondition.LLFC_NE: ['z'],
        #		LowLevelILFlagCondition.LLFC_NEG: ['n'],
        #		LowLevelILFlagCondition.LLFC_POS: ['n']
    }

    stack_pointer = 'sp'

    def is_never_branch_patch_available(self, data, addr):
        return ord(data[0:1]) in [0x10, 0x11, 0x16, 0x1b, 0x1e, 0x1f]

    def is_invert_branch_patch_available(self, data, addr):
        return ord(data[0:1]) in [0x10, 0x11, 0x1e, 0x1f]

    def is_always_branch_patch_available(self, data, addr):
        return ord(data[0:1]) in [0x10, 0x11, 0x1e, 0x1f]

    def is_skip_and_return_zero_patch_available(self, data, addr):
        return (data[0:1] == "\x19") and (len(data) == 4)

    def is_skip_and_return_value_patch_available(self, data, addr):
        return (data[0:1] == "\x19") and (len(data) == 4)

    def convert_to_nop(self, data, addr):
        return b"\x02" * len(data)

    def never_branch(self, data, addr):
        return self.convert_to_nop(data, addr)

    def always_branch(self, data, addr):
        if ord(data[0:1]) not in [0x10, 0x11, 0x1e, 0x1f]:
            return None
        return b"\x1b" + data[1:]

    def invert_branch(self, data, addr):
        if ord(data[0:1]) not in [0x10, 0x11, 0x1e, 0x1f]:
            return None
        return chr(ord(data[0:1]) ^ 0x01).encode('charmap') + data[1:]

    def skip_and_return_value(self, data, addr, value):
        if (data[0:1] != "\x19") or (len(data) != 4):
            return None
        return ("\x04" + chr(OP_FLAG_REG_DIRECT08) + "\x00" +
                chr(value & 0xff)).encode('charmap')

    def assemble(self, code, addr):
        code = code.decode('charmap')
        if ".b" in code:
            code = code.replace(".b", "")
        code = list(filter(None, code.replace(", ", " ").split(" ")))
        mnemonic = code[0]
        if mnemonic not in mnemonics:
            raise ValueError("Invalid mnemonic {}".format(code))
        assembly = mnemonics[mnemonic]
        cls = instruction_dict[ord(assembly)]
        if cls.__base__ == Instruction:
            return assembly.encode('charmap')
        valid_flags = None
        if FlagInstruction in cls.__mro__:
            valid_flags = cls.valid_flags
        else:
            valid_flags = [cls.flag]

        dst_flag = None
        dst = code[1]
        if dst[0] == '[' and dst[-1] == ']':
            if OP_FLAG_REGINDIRECT_REG not in valid_flags:
                raise ValueError("Invalid destination operand {}".format(dst))
            dst = dst[1:-1]
            dst_flag = OP_FLAG_REGINDIRECT_REG

        # python2 0x8723L
        if dst[-1:] == 'L':
            dst = dst[:-1]

        try:
            dst_value = int(dst, 0) & 0xffff
            if dst_value < 0x100:
                flag = OP_FLAG_DIRECT08
                dst = chr(dst_value)
            else:
                if BranchInstruction in cls.__mro__:
                    dst_value -= addr
                    if mnemonic == 'jns':
                        dst_value -= 2
                    elif mnemonic == 'call':
                        dst_value -= 4
                    else:
                        dst_value -= 3
                    dst_value &= 0xffff
                flag = OP_FLAG_DIRECT16
                dst = struct.pack("<H", dst_value).decode('charmap')
        except:
            dst = register_indexes[dst]
            flag = OP_FLAG_REG

        if len(code) == 2:
            if flag not in valid_flags:
                raise ValueError("Invalid destination operand {}".format(dst))
            if len(valid_flags) > 1:
                assembly += chr(flag)
            assembly += dst
            return assembly.encode('charmap')

        src_flag = None
        src = code[2]
        if src[0] == '[' and src[-1] == ']':
            if OP_FLAG_REG_REGINDIRECT not in valid_flags:
                raise ValueError("Invalid destination operand {}".format(dst))
            src = src[1:-1]
            src_flag = OP_FLAG_REGINDIRECT_REG

        if flag != OP_FLAG_REG:
            raise ValueError("Invalid destination register: {}".format(dst))

        flag = None

        if not src_flag:
            try:
                src_value = int(src, 0) & 0xffff
                if src_value < 0x100:
                    src_flag = OP_FLAG_DIRECT08
                    src = chr(src_value)
                else:
                    src_flag = OP_FLAG_DIRECT16
                    src = struct.pack("<H", src_value).decode('charmap')
            except:
                src = register_indexes[src]
                src_flag = OP_FLAG_REG

        if dst_flag:
            if src_flag == dst_flag:
                flag = OP_FLAG_REGINDIRECT_REGINDIRECT
            elif src_flag == OP_FLAG_DIRECT08:
                flag = OP_FLAG_REGINDIRECT_DIRECT08
            elif src_flag == OP_FLAG_DIRECT16:
                flag = OP_FLAG_REGINDIRECT_DIRECT16
            elif src_flag == OP_FLAG_REG:
                flag = OP_FLAG_REGINDIRECT_REG
            else:
                raise ValueError("src_flag is bugged: {:x}".format(src_flag))
        else:
            if src_flag == OP_FLAG_REGINDIRECT_REG:
                flag = OP_FLAG_REG_REGINDIRECT
            elif src_flag == OP_FLAG_DIRECT08:
                flag = OP_FLAG_REG_DIRECT08
            elif src_flag == OP_FLAG_DIRECT16:
                flag = OP_FLAG_REG_DIRECT16
            elif src_flag == OP_FLAG_REG:
                flag = OP_FLAG_REG_REG
            else:
                raise ValueError("src_flag is bugged: {:x}".format(src_flag))

        if flag not in valid_flags:
            raise ValueError(
                "Invalid operands for operation: {}".format(mnemonic))

        if len(valid_flags) > 1:
            assembly += chr(flag)
        assembly += dst + src
        return assembly.encode('charmap')

    def decode_instruction(self, data, addr):
        if addr < 0x8000:
            return
        opcode = ord(data[:1])
        if opcode not in instruction_dict:
            log_error('0x{:x} : Bad opcode: {:x}'.format(addr, opcode))
            return None

        instr_obj = None
        try:
            instr_obj = instruction_dict[opcode](data, addr)
        except VMNDHError as e:
            log_error('0x{:x} : Bad instruction: {:s}'.format(addr, str(e)))
            return None

        return instr_obj

    def get_instruction_info(self, data, addr):
        instr_obj = self.decode_instruction(data, addr)

        if not instr_obj:
            return None

        result = InstructionInfo()
        result.length = instr_obj.length

        instr_name = instr_obj.getName()

        # TODO: update this properly
        # Add branches
        if instr_name in ['ret', 'end']:
            result.add_branch(BranchType.FunctionReturn)
        elif instr_name.startswith('jmp'):
            result.add_branch(BranchType.UnconditionalBranch,
                              instr_obj.dst_value)
        elif instr_name in BRANCH_INSTRUCTIONS:
            result.add_branch(BranchType.TrueBranch, instr_obj.dst_value)
            result.add_branch(BranchType.FalseBranch, addr + instr_obj.length)
        elif instr_name == 'call':
            result.add_branch(BranchType.CallDestination, instr_obj.dst_value)
        elif instr_name == 'syscall':
            result.add_branch(BranchType.SystemCall)

        return result

    def get_instruction_text(self, data, addr):
        instr_obj = self.decode_instruction(data, addr)

        if not instr_obj:
            return None

        tokens = []

        instruction_text = instr_obj.getName()

        if hasattr(instr_obj, "flag") and flag_word_size[instr_obj.flag] == 1:
            instruction_text += '.b'

        tokens = [
            InstructionTextToken(InstructionTextTokenType.InstructionToken,
                                 '{:7s}'.format(instruction_text))
        ]

        tokens += instr_obj.getTextToken()

        return tokens, instr_obj.length

    def get_instruction_low_level_il(self, data, addr, il):
        instr_obj = self.decode_instruction(data, addr)

        if not instr_obj:
            return None

        insns = instr_obj.do_il(data, addr, il)
        if isinstance(insns, list):
            [il.append(i) for i in insns]
        elif insns is not None:
            try:
                il.append(insns)
            except:
                traceback.print_exc()
                print(type(insns), insns, instr_obj.getName(), hex(addr))
        return instr_obj.length
Esempio n. 21
0
class Synacor(Architecture):
    name = 'Synacor'

    address_size = size
    default_int_size = size
    instr_alignment = 1
    max_instr_length = max([op.size for op in operations])

    regs = {
        'R0': RegisterInfo('R0', size),
        'R1': RegisterInfo('R1', size),
        'R2': RegisterInfo('R2', size),
        'R3': RegisterInfo('R3', size),
        'R4': RegisterInfo('R4', size),
        'R5': RegisterInfo('R5', size),
        'R6': RegisterInfo('R6', size),
        'R7': RegisterInfo('R7', size),

        # Not sure if used, but required by Binary Ninja
        'sp': RegisterInfo('sp', size)
    }
    stack_pointer = 'sp'

    def assemble(self, code, _addr):
        parts = re.split('[ ,]+', code.decode().strip())
        instr = parts.pop(0)
        op_cls = lookup.get(instr) or lookup.get(safeint(instr, 0))
        if op_cls is None:
            raise ValueError("No operation found for '%s'" % instr)

        types = op_cls.operand_types
        if len(parts) != len(types):
            raise ValueError("'%s' requires exactly %d operands" %
                             (op_cls.label, len(types)))

        values = [op_cls.opcode]
        for (i, optype) in enumerate(types):
            values.append(Operand.assemble(i, optype, parts[i]))
        return struct.pack('<%iH' % len(values), *values)

    def convert_to_nop(self, data, _addr):
        nop = struct.pack('<1H', NoopOperation.opcode)
        return nop * (len(data) // size)

    def decode(self, data, count, offset=0):
        start = offset * size
        end = start + count * size
        if len(data) < end - start:
            return [None] * count
        return struct.unpack('<%iH' % count, data[start:end])

    def decode_operation(self, data, addr):
        opcode, = self.decode(data, count=1)
        op_cls = lookup.get(opcode)
        if op_cls is None:
            return None

        types = op_cls.operand_types
        values = self.decode(data, count=len(types), offset=1)
        if values is None:
            return None

        operands = [
            Operand(i, optype, values[i]) for (i, optype) in enumerate(types)
        ]
        return op_cls(self, addr, operands)

    def get_instruction_info(self, data, addr):
        op = self.decode_operation(data, addr)
        if op is None:
            return None

        ii = InstructionInfo()
        ii.length = op.size
        op.branching(ii)
        return ii

    def get_instruction_text(self, data, addr):
        op = self.decode_operation(data, addr)
        if op is None:
            return None

        tokens = []
        op.tokenize(tokens)
        return tokens, op.size

    def get_instruction_low_level_il(self, data, addr, il):
        op = self.decode_operation(data, addr)
        if op is None:
            return None

        op.low_level_il(il)
        return op.size
Esempio n. 22
0
class EVM(Architecture):
    name = "EVM"

    # Actual size is 32 but we're going to truncate everything
    address_size = ADDR_SIZE

    # should be 32
    default_int_size = ADDR_SIZE

    instr_alignment = 1

    max_instr_length = 33

    endianness = Endianness.BigEndian

    regs = {
        "sp": RegisterInfo("sp", ADDR_SIZE),
    }

    stack_pointer = "sp"

    def get_instruction_info(self, data, addr):
        instruction = disassemble_one(data, addr)

        result = InstructionInfo()
        result.length = instruction.size
        if instruction.name == "JUMP":
            result.add_branch(BranchType.UnresolvedBranch)
        elif instruction.name == "JUMPI":
            result.add_branch(BranchType.UnresolvedBranch)
            result.add_branch(BranchType.FalseBranch, addr + 1)
        elif instruction.name in ('RETURN', 'REVERT', 'SUICIDE', 'INVALID',
                                  'STOP', 'SELFDESTRUCT'):
            result.add_branch(BranchType.FunctionReturn)

        return result

    def get_instruction_text(self, data, addr):
        instruction = disassemble_one(data, addr)

        tokens = []
        tokens.append(
            InstructionTextToken(
                InstructionTextTokenType.TextToken,
                "{:7} ".format(
                    instruction.name
                )
            )
        )

        if instruction.name.startswith('PUSH'):
            tokens.append(
                InstructionTextToken(
                    InstructionTextTokenType.IntegerToken,
                    '#{:0{i.operand_size}x}'.format(
                        instruction.operand, i=instruction
                    ),
                    instruction.operand
                )
            )

        return tokens, instruction.size

    def get_instruction_low_level_il(self, data, addr, il):
        instruction = disassemble_one(data, addr)

        ill = insn_il.get(instruction.name, None)
        if ill is None:

            for i in range(instruction.pops):
                il.append(
                    il.set_reg(ADDR_SIZE, LLIL_TEMP(i), il.pop(ADDR_SIZE))
                )

            for i in range(instruction.pushes):
                il.append(il.push(ADDR_SIZE, il.unimplemented()))

            il.append(il.nop())

            return instruction.size

        ils = ill(il, addr, instruction.operand)
        if isinstance(ils, list):
            for i in ils:
                il.append(il)
        else:
            il.append(ils)

        return instruction.size

    def assemble(self, code, addr=0):
        try:
            return assemble(code, addr), ''
        except Exception as e:
            return None, str(e)
Esempio n. 23
0
class Subleq(Architecture):
    name = "subleq"
    address_size = 4
    default_int_size = 4
    max_instr_length = 12  # Each instruction is 3 dwords

    # SP register is required, even if we are not going to use it
    regs = {'sp': RegisterInfo('sp', 2)}
    stack_pointer = 'sp'

    def perform_get_instruction_info(self, data, addr):
        # If we can't decode an instruction return None
        if len(data) < 12:
            return None

        # Unpack our operands from the data
        a, b, c = struct.unpack('<3I', data[:12])

        # Create the InstructionInfo object for our instruction
        res = InstructionInfo()
        res.length = 12

        if c != 0:
            if b == a:
                # Unconditional branch jumps to integer index c
                res.add_branch(BranchType.UnconditionalBranch, c * 4)
            else:
                # True branch jumps to integer index c
                res.add_branch(BranchType.TrueBranch, c * 4)
                # False branch continues to next instruction
                res.add_branch(BranchType.FalseBranch, addr + 12)

        return res

    def perform_get_instruction_text(self, data, addr):
        # If we can't decode an instruction return None
        if len(data) < 12:
            return None

        # Unpack our operands from the data
        a, b, c = struct.unpack('<3I', data[:4 * 3])

        tokens = []

        # Check for invalid instructions that would crash
        if b * 4 >= 0x4400 or a * 4 >= 0x4400:
            tokens = []
            tokens.append(makeToken('i', '{:7s}'.format('invalid')))
            return tokens, 4 * 3

        # Clear instruction to be less verbose
        # clear [B]
        elif a == b:
            tokens = []
            tokens.append(makeToken('i', '{:7s}'.format('clear')))
            tokens.append(makeToken('t', '['))
            tokens.append(makeToken('a', hexr(b * 4), b * 4))
            tokens.append(makeToken('t', ']'))

        # Normal sub instruction
        # sub [B], [A]
        else:
            tokens.append(makeToken('i', '{:7s}'.format('sub')))
            tokens.append(makeToken('t', '['))

            tokens.append(makeToken('a', hexr(b * 4), b * 4))
            tokens.append(makeToken('t', ']'))
            tokens.append(makeToken('s', ', '))
            tokens.append(makeToken('t', '['))
            tokens.append(makeToken('a', hexr(a * 4), a * 4))
            tokens.append(makeToken('t', ']'))

        # Unconditional jump
        # ; jmp C
        if c != 0 and b == a:
            tokens.append(makeToken('s', '; '))
            tokens.append(makeToken('i', '{:7s}'.format('jmp')))
            tokens.append(makeToken('a', hex(c * 4), c * 4))

        # Conditional jump
        # ; jmp C if [B] <= 0
        elif c != 0:
            tokens.append(makeToken('s', '; '))
            tokens.append(makeToken('i', '{:7s}'.format('jmp')))
            tokens.append(makeToken('a', hex(c * 4), c * 4))
            tokens.append(makeToken('s', ' if '))
            tokens.append(makeToken('t', '['))
            tokens.append(makeToken('a', hex(b * 4), b * 4))
            tokens.append(makeToken('t', ']'))
            tokens.append(makeToken('t', ' <= 0'))

        return tokens, 4 * 3

    # Full LLIL lifting for subleq
    def perform_get_instruction_low_level_il(self, data, addr, il):
        # If we can't decode an instruction return None
        if len(data) < 12:
            return None

        # Unpack our operands from the data
        a, b, c = struct.unpack('<3I', data[:4 * 3])

        # If this instruction would crash, ignore it
        if b * 4 >= 0x4400 or a * 4 >= 0x4400:
            il.append(il.nop())
            return 4 * 3

        # A, B, and C as pointers
        addr_a = il.const_pointer(4, a * 4)
        addr_b = il.const_pointer(4, a * 4)
        addr_c = il.const_pointer(4, c * 4)

        # mem[A] and mem[B] pointers
        mem_a = il.load(4, addr_a)
        mem_b = il.load(4, addr_b)

        # For a clear instruction just store 0
        if a == b:
            # *B = 0
            store_b = il.store(4, addr_b, il.const(4, 0))
            il.append(store_b)

        # For normal operation, construct a subtraction
        else:
            # *B = *B - *A
            sub_op = il.sub(4, mem_b, mem_a)
            store_b = il.store(4, addr_b, sub_op)
            il.append(store_b)

        # Unconditional jump
        if c != 0 and b == a:
            # goto C
            jmp = il.jump(addr_c)
            il.append(jmp)

        # Conditional jump
        elif c != 0:
            # See if we have marked the True jump target before
            t_target = il.get_label_for_address(
                Architecture['subleq'], il[il.const_pointer(4,
                                                            c * 4)].constant)

            # Create the False jump label
            f_target = LowLevelILLabel()

            # If we have to create a jump IL for the True target
            indirect = t_target is None
            if indirect:
                t_target = LowLevelILLabel()

            less_op = il.compare_signed_less_equal(4, mem_b, il.const(4, 0))
            if_op = il.if_expr(less_op, t_target, f_target)
            il.append(if_op)

            # We need to create a jump to the true target if it doesn't exist
            if indirect:
                il.mark_label(t_target)
                jmp = il.jump(addr_c)
                il.append(jmp)

            # Last is the fall though for the false target
            il.mark_label(f_target)

        return 12