Beispiel #1
0
class XtensaLE(Architecture):
        name = "Xtensa LE"
        endianness = Endianness.LittleEndian
        address_size = 4
        default_int_size = 4
        instr_alignment = 1
        max_instr_length = 3

	# Include extra useless garbage in the LLIL
	# and also dump ESIL in the instruction
	VERBOSE_IL = False

	regs = {
		"pc": RegisterInfo("pc", 4),
		"sar": RegisterInfo("sar", 1), # actually 6 bits but whatever
		"lbegin": RegisterInfo("lbegin", 4),
		"lend": RegisterInfo("lend", 4),
		"lcount": RegisterInfo("lcount", 4),
		# ours actually just has the full number, not the log_2 of it
		"PS.CALLINC": RegisterInfo("PS.CALLINC", 1),
	}
	stack_pointer = 'a1' # Standard ABI
	# Note: not using a reg_stack because this is intended for x87/FPU
	# it's not intended for windowed registers
	for i in range(16):
		n = "a{0}".format(i)
		regs[n] = RegisterInfo(n, 4)

	intrinsics = {
		"memw": IntrinsicInfo([], []),
		"entry": IntrinsicInfo([], []),
	}

	_branch_instrs = ["bbci", "bbsi", "bgeu", "bltu", "bany", "bnone", "ball", "bnall", "bbc", "bbs"]
	for operand in ["z", "i", "ui", ""]:
		for cmp in ["eq", "ne", "ge", "lt"]:
			_branch_instrs.append("b"+cmp+operand)

	_esil_to_llil = {
		"-": "sub",
		"+": "add",
		"&": "and_expr",
		"|": "or_expr",
		"^": "xor_expr",
		">>": "logical_shift_right", #??
		"<<": "shift_left",
		"==": "compare_equal",
		">=": "compare_unsigned_greater_equal",
		"<=": "compare_unsigned_less_equal",
		">": "compare_unsigned_greater_than",
		"<": "compare_unsigned_less_than",
	}

	def __init__(self):
		super(XtensaLE, self).__init__()
		self.r2 = self._init_r2()
		self.cache = {}
		self._lock = threading.Lock()
		self._looplock = threading.Lock()
		self.loops = {}
	def _init_r2(self):
		r = r2pipe.open('/dev/null')
		r.cmd("e asm.arch=xtensa")
		return r
	def _r2_cache(self, cmd):
		with self._lock:
			if cmd in self.cache:
				return self.cache[cmd]
			res = self.r2.cmd(cmd)
			self.cache[cmd] = res
			return res
	def _inst_length(self, name):
		return 2 if name.endswith(".n") else 3
	def _get_asm(self, data, addr):
		asm = self._r2_cache("s {0}; pad {1}".format(addr, hexlify(data)))
		firstline = asm.strip().split("\n")[0].encode("ascii")
		if " " not in firstline:
			return firstline, []
		inst, args = firstline.split(" ", 1)
		inst = inst.lower()
		args = args.split(", ")
		return inst, args
	def _get_esil(self, data, addr):
		return self._r2_cache("s {0}; pade {1}".format(addr, hexlify(data))).strip().encode("ascii")
	def _get_reil(self, esil):
		return self._r2_cache("aetr '" + esil + "'")
	def get_instruction_info(self, data, addr):
		inst,args = self._get_asm(data, addr)
		if inst == "ill":
			return None
		res = InstructionInfo()
		res.length = self._inst_length(inst)

		if inst in ("jx"):
			if args[0] in self.regs:
				res.add_branch(BranchType.IndirectBranch)
			else:
				res.add_branch(BranchType.UnconditionalBranch, int(args[0], 16))
		elif inst in ("callx0", "callx4", "callx8", "callx12"):
			res.add_branch(BranchType.CallDestination)
		elif inst in ("ret", "retw", "ret.n", "retw.n"):
			res.add_branch(BranchType.FunctionReturn)
		elif inst == "j":
			res.add_branch(BranchType.UnconditionalBranch, int(args[0], 16))
		elif inst in ("call0", "call4", "call8", "call12"):
			res.add_branch(BranchType.CallDestination, int(args[0], 16))
		elif inst in ("loopgtz", "loopnez"):
			res.add_branch(BranchType.FalseBranch, int(args[1], 16))
			res.add_branch(BranchType.TrueBranch, addr + res.length)
		elif inst in self._branch_instrs or (inst.endswith(".n") and inst[:-2] in self._branch_instrs):
			res.add_branch(BranchType.TrueBranch, int(args[-1], 16))
			res.add_branch(BranchType.FalseBranch, addr + res.length)
		return res
	def _decode_l32r(self, litbase, addr, bytes):
		a, b, c = tuple(ord(x) for x in reversed(str(bytes[0:3])))
		imm16 = (a << 8) + b
		t = (c >> 4)
		offset = (0x3FFF << 18) | (imm16 << 2)
		if LITBASE & 0x1:
			target = (LITBASE & 0xFFFFF000) + offset
		else:
			target = ((addr + 3) & 0xFFFFFFFC) + offset
		target = target % (1 << 32)
		return ("a{0}".format(t), target)
	def get_instruction_text(self, data, addr):
		inst,args = self._get_asm(data, addr)
		if inst == "ill":
			return None
		# override buggy l32r in radare
		if inst == "l32r" and LITBASE & 0x1 == 1:
			a,b = self._decode_l32r(LITBASE, addr, data)
			args[1] = hex(b)
		tokens = []
		tokens.append(makeToken("inst", inst))
		tokens.append(makeToken("sep", " "))
		for i, arg in enumerate(args):
			if i != 0:
				tokens.append(makeToken("sep", ", "))
			if arg.startswith("0x"):
				tokens.append(makeToken("addr", arg))
			elif arg.isdigit():
				tokens.append(makeToken("int", arg))
			else:
				tokens.append(makeToken("reg", arg))

		if self.VERBOSE_IL:
			esil = self._get_esil(data, addr)
			tokens.append(makeToken("sep", "    "))
			tokens.append(makeToken("text", "esil='"+esil+"'"))

		return tokens, self._inst_length(inst)
	def force_label(self, il, a):
		t = il.get_label_for_address(self, a)
		if t is None:
			t = il.add_label_for_address(self, a)
			if t is None:
				return self.force_label(il, a)
		return t

	def goto_or_jmp(self, il, a):
		t = self.force_label(il, a)
		if t is None:
			il.append(il.jump(il.const_pointer(4, a)))
		else:
			il.append(il.goto(t))

	def get_instruction_low_level_il(self, data, addr, il):
		locals = threading.local()
		inst,args = self._get_asm(data, addr)
		if inst == "ill":
			return None
		l = self._inst_length(inst)

		if inst in ("jx"):
			if args[0] in self.regs:
				il.append(il.jump(il.reg(4, args[0])))
			else:
				self.goto_or_jmp(il, int(args[0], 16))
			return l
		elif inst.startswith("call"):
			spilled_regs = int(inst[5 if inst.startswith("callx") else 4:])
			# Spill onto stack
			a = lambda a: "a{0}".format(a)
			r = lambda r: il.reg(4, "a{0}".format(r))
#			if spilled_regs != 0:
#				for i in range(spilled_regs):
#					il.append(il.push(4, r(i)))
#				for i in range(spilled_regs, 16):
#					il.append(il.set_reg(4, a(i-spilled_regs), r(i)))
			if spilled_regs != 0 and self.VERBOSE_IL:
				il.append(il.set_reg(1, "PS.CALLINC", il.const(1, spilled_regs)))
			# return address
#			il.append(il.set_reg(4, a(spilled_regs), il.const(4, addr + l)))
			target = il.reg(4, args[0]) if inst.startswith("callx") else il.const_pointer(4, int(args[0], 16))
			il.append(il.call(target))
			# unspill from stack
#			if spilled_regs != 0:
#				for i in range(15, spilled_regs-1, -1):
#					il.append(il.set_reg(4, a(i), r(i-spilled_regs)))
#				for i in range(spilled_regs-1, -1, -1):
#					il.append(il.set_reg(4, a(i), il.pop(4)))
			return l
		elif inst in ("ret", "retw", "ret.n", "retw.n"):
			il.append(il.ret(il.reg(4, "a0")))
			return l
		elif inst == "j":
			il.append(il.jump(il.const_pointer(4, int(args[0], 16))))
			return l
		elif inst in ("loopgtz", "loopnez", "loop"):
			lbegin = addr + l
			lend = int(args[1], 16)
			r = il.reg(4, args[0])
			lcount = il.sub(4, r, il.const(4,1))
			# lend must come before lbegin for loop detection to work lower down
			if self.VERBOSE_IL:
				il.append(il.set_reg(4, "lend", il.const_pointer(4, lend)))
				il.append(il.set_reg(4, "lbegin", il.const_pointer(4, lbegin)))
				il.append(il.set_reg(4, "lcount", lcount))
			if inst in ("loopgtz", "loopnez"):
				t = self.force_label(il, lbegin)
				f = self.force_label(il, lend)
				set_t = False
				set_f = False
				if t is None:
					set_t = True
					t = LowLevelILLabel()
				if f is None:
					set_f = True
					f = LowLevelILLabel()
				if inst == "loopnez":
					cond = il.compare_unsigned_greater_equal(4, r, il.const(4, 0))
				else:
					cond = il.compare_signed_greater_equal(4, r, il.const(4, 0))
				il.append(il.if_expr(cond, t, f))
				if set_f:
					il.mark_label(f)
					self.goto_or_jmp(il, lend)
				if set_t:
					il.mark_label(t)
					# fallthrough

			with self._looplock:
				self.loops[lend] = lbegin
			return l
		elif inst == "entry":
			# Entry doesn't *do* anything, basically
			il.append(il.intrinsic([], "entry", []))
			return l
		elif inst == "memw":
			il.append(il.intrinsic([], "memw", []))
			return l
		# override buggy l32r in radare
		elif inst == "l32r" and LITBASE & 0x1 == 1:
			a,b = self._decode_l32r(LITBASE, addr, data)
			il.append(il.set_reg(4, a, il.load(4, il.const_pointer(4, b))))
			return l

		esil = self._get_esil(data[0:l], addr)
		if esil == "":
			il.append(il.unimplemented())
			return l
		parts = esil.split(",")

		# For basic instructions, interpret the ESIL
		self.esil_to_llil(inst, parts, il, addr, l)

		# Scan the function for loop instructions pointing to here
		lbegin = None
		with self._looplock:
			n = addr + l
			if n in self.loops:
				lbegin = self.loops[n]
		if lbegin is not None:
			cond = il.compare_unsigned_greater_than(4, il.reg(4, "lcount"), il.const(4, 0))
			f = self.force_label(il, n)
			t = self.force_label(il, lbegin) #il.get_label_for_address(self, lbegin)
			set_f = False
			set_t = False
			if f is None:
				set_f = True
				f = LowLevelILLabel()
			if t is None:
				set_t = True
				t = LowLevelILLabel()

			il.append(il.if_expr(cond, t, f))
			if set_t:
				il.mark_label(t)
				self.goto_or_jmp(il, lbegin)
			if set_f:
				il.mark_label(f)
				# fallthrough
		return l

	# Implement a basic stack machine to translate ESIL to LLIL
	def esil_to_llil(self, inst, parts, il, addr, l):
		stack = []
		label_stack = []
		skip_to_close = False
		# pop for reading - interprets the PC register as
		# the value of the next instruction
		def popr():
			r = stack.pop()
			if r == "pc":
				return il.const_pointer(4, addr + l)
			return r
		for i, token in enumerate(parts):
			# No idea why I need this
			if token == "" and i == len(parts)-1:
				break
			if skip_to_close and token != "}": continue
			if token == "$$":
				stack.append(il.const_pointer(4, addr))
				continue
			if token == "pc":
				stack.append("pc")
				continue
			if token in self.regs:
				stack.append(il.reg(4, token))
				continue
			if token in self._esil_to_llil:
				dst = popr()
				src = popr()
				stack.append(getattr(il, self._esil_to_llil[token])(4, dst, src))
				continue
			if token == "$z" or token == "!":
				stack.append(il.compare_equal(4, stack[-1], il.const(4, 0)))
				continue
			if token == "DUP":
				stack.append(stack[-1])
				continue
			if token == "=":
				dst = stack.pop()
				src = popr()
				if dst == "pc":
					srci = il[src]
					if srci.operation == LowLevelILOperation.LLIL_CONST:
						self.goto_or_jmp(il, srci.operands[0])
						continue
					il.append(il.jump(src))
					continue
				dst = il[dst]
				if dst.operation != LowLevelILOperation.LLIL_REG:
					raise ValueError("unimplemented il store to {0!r}".format(dst))
				il.append(il.set_reg(4, dst.operands[0].name, src))
				continue
			if token == "+=":
				dste = stack.pop()
				src = popr()
				if dste == "pc":
					srci = il[src]
					# Note in ESIL this is w.r.t. the *next* address
					# For narrow branch instructions, it calculates the pc relative
					# wrong in the ESIL and uses 3 bytes anyway
					# also, srci.operands[0] is 8 bytes *signed* but ESIL
					# doesn't seem to reflect this?
					# Note: except beqz, bnez, bgez, bltz which have 12 bytes *signed*
					# and beqz.n and bnez.n which are 4 bytes unsigned
					if srci.operation == LowLevelILOperation.LLIL_CONST:
						offset = srci.operands[0]
						if inst in ("beqz", "bnez", "bgez", "bltz"):
							if offset > (1 << 11) - 1:
								offset = ((1<<12)-offset) * -1
						elif inst in ("beqz.n", "bnez.n"): pass
						elif offset > 127:
							offset = (256-offset) * -1
						self.goto_or_jmp(il, offset + addr + 3)
					else:
						il.append(il.jump(il.add(4, il.const_pointer(4, addr + 3), src)))
					continue
				dst = il[dste]
				if dst.operation != LowLevelILOperation.LLIL_REG:
					raise ValueError("unimplemented il store to {0!r}".format(dst))
				il.append(il.set_reg(4, dst.operands[0].name, il.add(4, dste, src)))
				continue
			if token.startswith("=["):
				sz = int(token[2:-1])
				dst = popr()
				src = popr()
				il.append(il.store(sz, dst, src))
				continue
			if token.startswith("["):
				sz = int(token[1:-1])
				if sz == 1 or sz == 2:
					stack.append(il.zero_extend(4, il.load(sz, popr())))
				elif sz == 4:
					stack.append(il.load(4, popr()))
				else:
					raise ValueError("Invalid load size {0}".format(sz))
				continue
			# Base 16 constants
			try:
				i = int(token, 16)
			except ValueError:
				pass
			else:
				stack.append(il.const(4, i))
				continue
			# Base 10 constants
			try:
				i = int(token)
			except ValueError:
				pass
			else:
				stack.append(il.const(4, i))
				continue

			# Hack to support branch instructions
			if token == "?{":
				t = None
				set_t = False
				end = parts.index("}", i+1)

				f = None
				# Don't create useless labels if this is at the end
				# of the instruction (e.g. a branch)
				if end == len(parts)-1:
					f = self.force_label(il, addr+l)
				if f is None:
					f = LowLevelILLabel()
					label_stack.append(f)

				inner = parts[i+1:end]

				fakeil = ThreaderILDuck()
				try:
					self.esil_to_llil(inst, inner, fakeil, addr, l)
				except AttributeError as e:
					pass
				except IndexError as e: # Tried to access the stack outside! Bad!
					pass
				except Exception as e:
					log.log_error("{0} {1}".format(e, inner))
					raise e
				else:
					if fakeil.target is not None:
						t = self.force_label(il, fakeil.target)
#						log.log_info("Prediction successful at {0:X}, {1}, {2:X} {3} {4}".format(addr, inner, fakeil.target, t, parts))
#					else:
#						log.log_warn("Prediction succesful but no target {0} {1}".format(inner, parts))

				if t is None:
					set_t = True
					t = LowLevelILLabel()

				il.append(il.if_expr(stack.pop(), t, f))
				if set_t:
					il.mark_label(t)
				elif len(label_stack) == 0:
					break
				else:
					skip_to_close = True
				continue

			if token == "}":
				if len(label_stack) == 0: break
				il.mark_label(label_stack.pop())
				skip_to_close = False
				continue

			raise ValueError("Unimplemented esil {0} in {1} for {2}".format(token, esil, inst))
Beispiel #2
0
class Z80(Architecture):
	name = 'Z80'

	address_size = 2
	default_int_size = 1
	instr_alignment = 1
	max_instr_length = 4

	# register related stuff
	regs = {
		# main registers
		'AF': RegisterInfo('AF', 2),
		'BC': RegisterInfo('BC', 2),
		'DE': RegisterInfo('DE', 2),
		'HL': RegisterInfo('HL', 2),

		# alternate registers
		'AF_': RegisterInfo('AF_', 2),
		'BC_': RegisterInfo('BC_', 2),
		'DE_': RegisterInfo('DE_', 2),
		'HL_': RegisterInfo('HL_', 2),

		# main registers (sub)
		'A': RegisterInfo('AF', 1, 1),
		'B': RegisterInfo('BC', 1, 1),
		'C': RegisterInfo('BC', 1, 0),
		'D': RegisterInfo('DE', 1, 1),
		'E': RegisterInfo('DE', 1, 0),
		'H': RegisterInfo('HL', 1, 1),
		'L': RegisterInfo('HL', 1, 0),
		'Flags': RegisterInfo('AF', 0),

		# alternate registers (sub)
		'A_': RegisterInfo('AF_', 1, 1),
		'B_': RegisterInfo('BC_', 1, 1),
		'C_': RegisterInfo('BC_', 1, 0),
		'D_': RegisterInfo('DE_', 1, 1),
		'E_': RegisterInfo('DE_', 1, 0),
		'H_': RegisterInfo('HL_', 1, 1),
		'L_': RegisterInfo('HL_', 1, 0),
		'Flags_': RegisterInfo('AF_', 0),

		# index registers
		'IX': RegisterInfo('IX', 2),
		'IY': RegisterInfo('IY', 2),
		'SP': RegisterInfo('SP', 2),

		# other registers
		'I': RegisterInfo('I', 1),
		'R': RegisterInfo('R', 1),

		# program counter
		'PC': RegisterInfo('PC', 2),

		# status
		'status': RegisterInfo('status', 1)
	}

	stack_pointer = "SP"

	# internal
	cond_strs = ['C', 'NC', 'Z', 'NZ', 'M', 'P', 'PE', 'PO']
	reg8_strs = list('ABDHCELIR') + ['A\'', 'B\'', 'C\'', 'D\'', 'E\'', 'H\'', 'L\'', 'Flags', 'Flags\'', 'IXh', 'IXl', 'IYh', 'IYl']
	reg16_strs = ['AF', 'BC', 'DE', 'HL', 'AF', 'AF\'', 'BC\'', 'DE\'', 'HL\'', 'IX', 'IY', 'SP', 'PC']
	reg_strs = reg8_strs + reg16_strs

	def get_instruction_info(self, data, addr):
		(instrTxt, instrLen) = skwrapper.disasm(data, addr)
		if instrLen == 0:
			return None
		result = InstructionInfo()
		result.length = instrLen

		rccs = r'(?:C|NC|Z|NZ|M|P|PE|PO)'
		regexes = [ \
			r'^(?:JP|JR) '+rccs+r',\$(.*)$',	# 0: conditional jump			eg: JP PE,#DEAD
			r'^(?:JP|JR) \$(.*)$',				# 1: unconditional jump		eg: JP #DEAD
			r'^(?:JP|JR) \((?:HL|IX|IY)\)$',	# 2: unconditional indirect	eg: JP (IX)
			r'^DJNZ \$(.*)$',					# 3: dec, jump if not zero		eg: DJNZ #DEAD
			r'^CALL '+rccs+r',\$(.*)$',			# 4: conditional call			eg: CALL PE,#DEAD
			r'^CALL \$(.*)$',					# 5: unconditional call		eg: CALL #DEAD
			r'^RET '+rccs+'$',					# 6: conditional return
			r'^(?:RET|RETN|RETI)$',				# 7: return, return (nmi), return (interrupt)
		]

		m = None
		for (i,regex) in enumerate(regexes):
			m = re.match(regex, instrTxt)
			if not m:
				continue

			if i==0 or i==3:
				dest = int(m.group(1), 16)
				result.add_branch(BranchType.TrueBranch, dest)
				result.add_branch(BranchType.FalseBranch, addr + instrLen)
				pass
			elif i==1:
				dest = int(m.group(1), 16)
				result.add_branch(BranchType.UnconditionalBranch, dest)
				pass
			elif i==2:
				result.add_branch(BranchType.IndirectBranch)
				pass
			elif i==4 or i==5:
				dest = int(m.group(1), 16)
				result.add_branch(BranchType.CallDestination, dest)
				pass
			elif i==6:
				pass # conditional returns don't end block
			elif i==7:
				result.add_branch(BranchType.FunctionReturn)

			break

		return result 

	def get_instruction_text(self, data, addr):
		(instrTxt, instrLen) = skwrapper.disasm(data, addr)
		if instrLen == 0:
			return None

		result = []
		atoms = [t for t in re.split(r'([, ()\+])', instrTxt) if t] # delimeters kept if in capture group
		result.append(InstructionTextToken(InstructionTextTokenType.InstructionToken, atoms[0]))
		if atoms[1:]:
			result.append(InstructionTextToken(InstructionTextTokenType.TextToken, ' '))

		#
		for atom in atoms[1:]:
			if not atom or atom == ' ':
				continue
			# PROBLEM: cond 'C' conflicts with register C
			# eg: "RET C" is it "RET <reg>" or "REG <cc>" ?
			# eg: "CALL C" is it "CALL <reg>" or "CALL C,$0000" ?
			elif atom == 'C' and atoms[0] in ['CALL','RET']:
				# flag, condition code
				result.append(InstructionTextToken(InstructionTextTokenType.TextToken, atom))
			elif atom in self.reg16_strs or atom in self.reg8_strs:
				result.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, atom))
			elif atom in self.cond_strs:
				result.append(InstructionTextToken(InstructionTextTokenType.TextToken, atom))
			elif atom[0] == '#':
				result.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, atom, int(atom[1:],16)))
			elif atom[0] == '$':
				if len(atom)==5:
					result.append(InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, atom, int(atom[1:],16)))
				else:
					result.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, atom, int(atom[1:],16)))
			elif atom.isdigit():
				result.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, atom, int(atom)))
			elif atom == '(':
				result.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, atom))
			elif atom == ')':
				result.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, atom))
			elif atom == '+':
				result.append(InstructionTextToken(InstructionTextTokenType.TextToken, atom))
			elif atom == ',':
				result.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, atom))
			else:
				raise Exception('unfamiliar token: %s from instruction %s' % (tok, instrTxt))

		return result, instrLen

	def get_instruction_low_level_il(self, data, addr, il):
		return None
class MCS48_Base(Architecture):

    address_size = 2
    default_int_size = 1
    instr_alignment = 1
    max_instr_length = 2

    regs = {
        'A': RegisterInfo('A', 1),
        'T': RegisterInfo('T', 1),
        'PSW': RegisterInfo('PSW', 1),
        'SP': RegisterInfo('SP', 1),
    }
    if WREG_REG:
        for reg in range(8):
            regs['R{}'.format(reg)] = RegisterInfo('R{}'.format(reg), 1)
        for reg in range(8):
            regs['R{}\''.format(reg)] = RegisterInfo('R{}\''.format(reg), 1)

    stack_pointer = 'SP'
    global_regs = ['T', 'PSW']

    # PSW: CY, AC, F0, BS, 1, S2, S1, S0
    # carry, aux carry, flag 0, bank select, stack pointer
    # BS:0, addr=0, BS:1, addr=24

    flags = [
        'CY', # carry
        'AC', # auxiliary carry
        'F0', # flag 0
        'BS', # bank switch

        'DBF',
        'F1', # flag 1

        'T0', # test 0
        'T1', # test 1
        'TF', # timer flag
        'INT' # interrupt
    ]

    # The first flag write type is ignored currently.
    # See: https://github.com/Vector35/binaryninja-api/issues/513
    flag_write_types = ['', 'C']
    flags_written_by_flag_write_type = {
        'C': ['CY'],
    }
    flag_roles = {
        'CY': FlagRole.CarryFlagRole,
        'AC': FlagRole.HalfCarryFlagRole,
        'F0': FlagRole.SpecialFlagRole,
        'BS': FlagRole.SpecialFlagRole,

        'DBF': FlagRole.SpecialFlagRole,
        'F1': FlagRole.SpecialFlagRole,

        'T0': FlagRole.SpecialFlagRole,
        'T1': FlagRole.SpecialFlagRole,
        'TF': FlagRole.SpecialFlagRole,
        'INT': FlagRole.SpecialFlagRole,
    }
    #flags_required_for_flag_condition = {}

    instructions = [
        # 0x00-0x0f
        [('NOP', 1), [],                lambda self, il: il.nop()],
        None,
        [('OUTL', 1), ['BUS', 'A'],     lambda self, il: il.reg(1, 'A')], # dummy read
        [('ADD', 2), ['A', '#IMM8'],    lambda self, il, imm: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), il.const(1, imm), 'C'))],
        [('JMP', 2), ['ADDR11'],        lambda self, il, imm: branch(il, CODE_ADDR(0x000, imm))],
        [('EN', 1), ['I']],
        None,
        [('DEC', 1), ['A'],             lambda self, il: il.set_reg(1, 'A', il.sub(1, il.reg(1, 'A'), il.const(1, 1)))],
        [('INS', 1), ['A', 'BUS'],      lambda self, il: il.set_reg(1, 'A', il.unimplemented())],
        [('IN', 1), ['A', 'P1'],        lambda self, il: il.set_reg(1, 'A', il.unimplemented())],
        [('IN', 1), ['A', 'P2'],        lambda self, il: il.set_reg(1, 'A', il.unimplemented())],
        None,
        [('MOVD', 1), ['A', 'P4'],      lambda self, il: il.set_reg(1, 'A', il.unimplemented())],
        [('MOVD', 1), ['A', 'P5'],      lambda self, il: il.set_reg(1, 'A', il.unimplemented())],
        [('MOVD', 1), ['A', 'P6'],      lambda self, il: il.set_reg(1, 'A', il.unimplemented())],
        [('MOVD', 1), ['A', 'P7'],      lambda self, il: il.set_reg(1, 'A', il.unimplemented())],
        # 0x10-0x1f
        [('INC', 1), ['@R0'],           lambda self, il: il.store(1, self.wreg_get(il, 0), il.add(1, il.load(1, self.wreg_get(il, 0)), il.const(1, 1)))],
        [('INC', 1), ['@R1'],           lambda self, il: il.store(1, self.wreg_get(il, 1), il.add(1, il.load(1, self.wreg_get(il, 1)), il.const(1, 1)))],
        [('JB0', 2), ['ADDR8'],         lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 0)],
        [('ADDC', 2), ['A', '#IMM8'],   lambda self, il, imm: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), il.const(1, imm), il.flag('CY'), 'C'))],
        [('CALL', 2), ['ADDR11'],       lambda self, il, imm: call_helper(il, CODE_ADDR(0x000, imm))],
        [('DIS', 1), ['I']],
        [('JTF', 2), ['ADDR8'],         lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'TF', 1)],
        [('INC', 1), ['A'],             lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), il.const(1, 1)))],
        [('INC', 1), ['R0'],            lambda self, il: self.wreg_set(il, 0, il.add(1, self.wreg_get(il, 0), il.const(1, 1)))],
        [('INC', 1), ['R1'],            lambda self, il: self.wreg_set(il, 1, il.add(1, self.wreg_get(il, 1), il.const(1, 1)))],
        [('INC', 1), ['R2'],            lambda self, il: self.wreg_set(il, 2, il.add(1, self.wreg_get(il, 2), il.const(1, 1)))],
        [('INC', 1), ['R3'],            lambda self, il: self.wreg_set(il, 3, il.add(1, self.wreg_get(il, 3), il.const(1, 1)))],
        [('INC', 1), ['R4'],            lambda self, il: self.wreg_set(il, 4, il.add(1, self.wreg_get(il, 4), il.const(1, 1)))],
        [('INC', 1), ['R5'],            lambda self, il: self.wreg_set(il, 5, il.add(1, self.wreg_get(il, 5), il.const(1, 1)))],
        [('INC', 1), ['R6'],            lambda self, il: self.wreg_set(il, 6, il.add(1, self.wreg_get(il, 6), il.const(1, 1)))],
        [('INC', 1), ['R7'],            lambda self, il: self.wreg_set(il, 7, il.add(1, self.wreg_get(il, 7), il.const(1, 1)))],
        # 0x20-0x2f
        [('XCH', 1), ['A', '@R0'],      lambda self, il: [
            il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), 
            il.set_reg(1, 'A', il.load(1, self.wreg_get(il, 0))), 
            self.wreg_set(il, 0, il.reg(1, LLIL_TEMP(1)))
        ]],
        [('XCH', 1), ['A', '@R1'],      lambda self, il: [
            il.set_reg(1, LLIL_TEMP(1), 
            il.reg(1, 'A')), il.set_reg(1, 'A', il.load(1, self.wreg_get(il, 1))), 
            self.wreg_set(il, 1, il.reg(1, LLIL_TEMP(1)))
        ]],
        None,
        [('MOV', 2), ['A', '#IMM8'],    lambda self, il, imm: il.set_reg(1, 'A', il.const(1, imm))],
        [('JMP', 2), ['ADDR11'],        lambda self, il, imm: branch(il, CODE_ADDR(0x100, imm))],
        [('EN', 1), ['TCNTI']],
        [('JNT0', 2), ['ADDR8'],        lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'T0', 0)],
        [('CLR', 1), ['A'],             lambda self, il: il.set_reg(1, 'A', il.const(1, 0))],
        [('XCH', 1), ['A', 'R0'],       lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 0)), self.wreg_set(il, 0, il.reg(1, LLIL_TEMP(1)))]],
        [('XCH', 1), ['A', 'R1'],       lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 1)), self.wreg_set(il, 1, il.reg(1, LLIL_TEMP(1)))]],
        [('XCH', 1), ['A', 'R2'],       lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 2)), self.wreg_set(il, 2, il.reg(1, LLIL_TEMP(1)))]],
        [('XCH', 1), ['A', 'R3'],       lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 3)), self.wreg_set(il, 3, il.reg(1, LLIL_TEMP(1)))]],
        [('XCH', 1), ['A', 'R4'],       lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 4)), self.wreg_set(il, 4, il.reg(1, LLIL_TEMP(1)))]],
        [('XCH', 1), ['A', 'R5'],       lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 5)), self.wreg_set(il, 5, il.reg(1, LLIL_TEMP(1)))]],
        [('XCH', 1), ['A', 'R6'],       lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 6)), self.wreg_set(il, 6, il.reg(1, LLIL_TEMP(1)))]],
        [('XCH', 1), ['A', 'R7'],       lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 7)), self.wreg_set(il, 7, il.reg(1, LLIL_TEMP(1)))]],
        # 0x30-0x3f
        [('XCHD', 1), ['A', '@R0']],
        [('XCHD', 1), ['A', '@R1']],
        [('JB1', 2), ['ADDR8'],         lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 1)],
        None,
        [('CALL', 2), ['ADDR11'],       lambda self, il, imm: call_helper(il, CODE_ADDR(0x100, imm))],
        [('DIS', 1), ['TCNTI']],
        [('JT0', 2), ['ADDR8'],         lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'T0', 1)],
        [('CPL', 1), ['A'],             lambda self, il: il.set_reg(1, 'A', il.not_expr(1, il.reg(1, 'A')))],
        None,
        [('OUTL', 1), ['P1', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        [('OUTL', 1), ['P2', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        None,
        [('MOVD', 1), ['P4', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        [('MOVD', 1), ['P5', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        [('MOVD', 1), ['P6', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        [('MOVD', 1), ['P7', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        # 0x40-0x4f
        [('ORL', 1), ['A', '@R0'],      lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 0))))],
        [('ORL', 1), ['A', '@R1'],      lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 1))))],
        [('MOV', 1), ['A', 'T'],        lambda self, il: il.set_reg(1, 'A', il.reg(1, 'T'))],
        [('ORL', 2), ['A', '#IMM8'],    lambda self, il, imm: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), il.const(1, imm)))],
        [('JMP', 2), ['ADDR11'],        lambda self, il, imm: branch(il, CODE_ADDR(0x200, imm))],
        [('STRT', 1), ['CNT']],
        [('JNT1', 2), ['ADDR8'],        lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'T1', 0)],
        [('SWAP', 1), ['A'],            lambda self, il: il.set_reg(1, 'A', il.rotate_left(1, il.reg(1, 'A'), il.const(1, 4)))],
        [('ORL', 1), ['A', 'R0'],       lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 0)))],
        [('ORL', 1), ['A', 'R1'],       lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 1)))],
        [('ORL', 1), ['A', 'R2'],		lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 2)))],
        [('ORL', 1), ['A', 'R3'],		lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 3)))],
        [('ORL', 1), ['A', 'R4'],		lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 4)))],
        [('ORL', 1), ['A', 'R5'],		lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 5)))],
        [('ORL', 1), ['A', 'R6'],		lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 6)))],
        [('ORL', 1), ['A', 'R7'],		lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 7)))],
        # 0x50-0x5f
        [('ANL', 1), ['A', '@R0'],		lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 0))))],
        [('ANL', 1), ['A', '@R1'],		lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 1))))],
        [('JB2', 2), ['ADDR8'],         lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 2)],
        [('ANL', 2), ['A', '#IMM8'],	lambda self, il, imm: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), il.const(1, imm)))],
        [('CALL', 2), ['ADDR11'],		lambda self, il, imm: call_helper(il, CODE_ADDR(0x200, imm))],
        [('STRT', 1), ['T'],            lambda self, il: il.reg(1, 'T')], # DUMMY
        [('JT1', 2), ['ADDR8'],         lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'T1', 1)],
        [('DA', 1), ['A']],
        [('ANL', 1), ['A', 'R0'],		lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 0)))],
        [('ANL', 1), ['A', 'R1'],		lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 1)))],
        [('ANL', 1), ['A', 'R2'],		lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 2)))],
        [('ANL', 1), ['A', 'R3'],		lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 3)))],
        [('ANL', 1), ['A', 'R4'],		lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 4)))],
        [('ANL', 1), ['A', 'R5'],		lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 5)))],
        [('ANL', 1), ['A', 'R6'],		lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 6)))],
        [('ANL', 1), ['A', 'R7'],		lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 7)))],
        # 0x60-0x6f
        [('ADD', 1), ['A', '@R0'],		lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 0)), 'C'))],
        [('ADD', 1), ['A', '@R1'],		lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 1)), 'C'))],
        [('MOV', 1), ['T', 'A'],		lambda self, il: il.set_reg(1, 'T', il.reg(1, 'A'))],
        None,
        [('JMP', 2), ['ADDR11'],        lambda self, il, imm: branch(il, CODE_ADDR(0x300, imm))],
        [('STOP', 1), ['TCNT']],
        None,
        [('RRC', 1), ['A'],             lambda self, il: il.set_reg(1, 'A', il.rotate_right_carry(1, il.reg(1, 'A'), il.const(1, 1), il.flag('CY'), 'C'))],
        [('ADD', 1), ['A', 'R0'],		lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 0), 'C'))],
        [('ADD', 1), ['A', 'R1'],		lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 1), 'C'))],
        [('ADD', 1), ['A', 'R2'],		lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 2), 'C'))],
        [('ADD', 1), ['A', 'R3'],		lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 3), 'C'))],
        [('ADD', 1), ['A', 'R4'],		lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 4), 'C'))],
        [('ADD', 1), ['A', 'R5'],		lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 5), 'C'))],
        [('ADD', 1), ['A', 'R6'],		lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 6), 'C'))],
        [('ADD', 1), ['A', 'R7'],		lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 7), 'C'))],
        # 0x70-0x7f
        [('ADDC', 1), ['A', '@R0'],		lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 0)), il.flag('CY'), 'C'))],
        [('ADDC', 1), ['A', '@R1'],		lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 1)), il.flag('CY'), 'C'))],
        [('JB3', 2), ['ADDR8'],         lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 3)],
        None,
        [('CALL', 2), ['ADDR11'],		lambda self, il, imm: call_helper(il, CODE_ADDR(0x300, imm))],
        [('ENT0', 1), ['CLK']],
        [('JF1', 2), ['ADDR8'],		    lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'F1', 1)],
        [('RR', 1), ['A'],		        lambda self, il: il.set_reg(1, 'A', il.rotate_right(1, il.reg(1, 'A'), il.const(1, 1)))],
        [('ADDC', 1), ['A', 'R0'],		lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 0), il.flag('CY'), 'C'))],
        [('ADDC', 1), ['A', 'R1'],		lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 1), il.flag('CY'), 'C'))],
        [('ADDC', 1), ['A', 'R2'],		lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 2), il.flag('CY'), 'C'))],
        [('ADDC', 1), ['A', 'R3'],		lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 3), il.flag('CY'), 'C'))],
        [('ADDC', 1), ['A', 'R4'],		lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 4), il.flag('CY'), 'C'))],
        [('ADDC', 1), ['A', 'R5'],		lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 5), il.flag('CY'), 'C'))],
        [('ADDC', 1), ['A', 'R6'],		lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 6), il.flag('CY'), 'C'))],
        [('ADDC', 1), ['A', 'R7'],		lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 7), il.flag('CY'), 'C'))],
        # 0x80-0x8f
        [('MOVX', 1), ['A', '@R0']],
        [('MOVX', 1), ['A', '@R1']],
        None,
        [('RET', 1), [],                lambda self, il: ret_helper(il, False)],
        [('JMP', 2), ['ADDR11'],        lambda self, il, imm: branch(il, CODE_ADDR(0x400, imm))],
        [('CLR', 1), ['F0'],            lambda self, il: il.set_flag('F0', il.const(0, 0))],
        [('JNI', 2), ['ADDR8'],         lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'INT', 0)],
        None,
        [('ORL', 2), ['BUS', '#IMM8']],
        [('ORL', 2), ['P1', '#IMM8']],
        [('ORL', 2), ['P2', '#IMM8']],
        None,
        [('ORLD', 1), ['P4', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        [('ORLD', 1), ['P5', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        [('ORLD', 1), ['P6', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        [('ORLD', 1), ['P7', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        # 0x90-0x9f
        [('MOVX', 1), ['@R0', 'A']],
        [('MOVX', 1), ['@R1', 'A']],
        [('JB4', 2), ['ADDR8'],         lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 4)],
        [('RETR', 1), [],               lambda self, il: ret_helper(il, True)],
        [('CALL', 2), ['ADDR11'],       lambda self, il, imm: call_helper(il, CODE_ADDR(0x400, imm))],
        [('CPL', 1), ['F0'],            lambda self, il: il.set_flag('F0', il.not_expr(0, il.flag('F0')))],
        [('JNZ', 2), ['ADDR8'],         lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'NZ')],
        [('CLR', 1), ['C'],             lambda self, il: il.set_flag('CY', il.const(0, 0))],
        [('ANL', 2), ['BUS', '#IMM8']],
        [('ANL', 2), ['P1', '#IMM8']],
        [('ANL', 2), ['P2', '#IMM8']],
        None,
        [('ANLD', 1), ['P4', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        [('ANLD', 1), ['P5', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        [('ANLD', 1), ['P6', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        [('ANLD', 1), ['P7', 'A'],      lambda self, il: il.reg(1, 'A')], # dummy read
        # 0xa0-0xaf
        [('MOV', 1), ['@R0', 'A'],      lambda self, il: il.store(1, self.wreg_get(il, 0), il.reg(1, 'A'))],
        [('MOV', 1), ['@R1', 'A'],      lambda self, il: il.store(1, self.wreg_get(il, 1), il.reg(1, 'A'))],
        None,
        [('MOVP', 1), ['A', '@A'],      lambda self, il: il.set_reg(1, 'A', il.load(1, il.or_expr(2, il.const(2, CODE_ADDR(il.current_address + 1, 0)), il.reg(1, 'A'))))],
        [('JMP', 2), ['ADDR11'],        lambda self, il, imm: branch(il, CODE_ADDR(0x500, imm))],
        [('CLR', 1), ['F1'],            lambda self, il: il.set_flag('F1', il.const(0, 0))],
        None,
        [('CPL', 1), ['C'],		        lambda self, il: il.set_flag('CY', il.not_expr(0, il.flag('CY')))],
        [('MOV', 1), ['R0', 'A'],		lambda self, il: self.wreg_set(il, 0, il.reg(1, 'A'))],
        [('MOV', 1), ['R1', 'A'],		lambda self, il: self.wreg_set(il, 1, il.reg(1, 'A'))],
        [('MOV', 1), ['R2', 'A'],		lambda self, il: self.wreg_set(il, 2, il.reg(1, 'A'))],
        [('MOV', 1), ['R3', 'A'],		lambda self, il: self.wreg_set(il, 3, il.reg(1, 'A'))],
        [('MOV', 1), ['R4', 'A'],		lambda self, il: self.wreg_set(il, 4, il.reg(1, 'A'))],
        [('MOV', 1), ['R5', 'A'],		lambda self, il: self.wreg_set(il, 5, il.reg(1, 'A'))],
        [('MOV', 1), ['R6', 'A'],		lambda self, il: self.wreg_set(il, 6, il.reg(1, 'A'))],
        [('MOV', 1), ['R7', 'A'],		lambda self, il: self.wreg_set(il, 7, il.reg(1, 'A'))],
        # 0xb0-0xbf
        [('MOV', 2), ['@R0', '#IMM8'],	lambda self, il, imm: il.store(1, self.wreg_get(il, 0), il.const(1, imm))],
        [('MOV', 2), ['@R1', '#IMM8'],	lambda self, il, imm: il.store(1, self.wreg_get(il, 1), il.const(1, imm))],
        [('JB5', 2), ['ADDR8'],		    lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 5)],
        [('JMPP', 1), ['@A'],		    lambda self, il: il.jump(il.or_expr(2, il.const(2, CODE_ADDR(il.current_address, 0)), il.reg(1, 'A')))], # FIXME: addr + 1?
        [('CALL', 2), ['ADDR11'],		lambda self, il, imm: call_helper(il, CODE_ADDR(0x500, imm))],
        [('CPL', 1), ['F1'],		    lambda self, il: il.set_flag('F1', il.not_expr(0, il.flag('F1')))],
        [('JF0', 2), ['ADDR8'],		    lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'F0', 1)],
        None,
        [('MOV', 2), ['R0', '#IMM8'],   lambda self, il, imm: self.wreg_set(il, 0, il.const(1, imm))],
        [('MOV', 2), ['R1', '#IMM8'],   lambda self, il, imm: self.wreg_set(il, 1, il.const(1, imm))],
        [('MOV', 2), ['R2', '#IMM8'],   lambda self, il, imm: self.wreg_set(il, 2, il.const(1, imm))],
        [('MOV', 2), ['R3', '#IMM8'],   lambda self, il, imm: self.wreg_set(il, 3, il.const(1, imm))],
        [('MOV', 2), ['R4', '#IMM8'],   lambda self, il, imm: self.wreg_set(il, 4, il.const(1, imm))],
        [('MOV', 2), ['R5', '#IMM8'],   lambda self, il, imm: self.wreg_set(il, 5, il.const(1, imm))],
        [('MOV', 2), ['R6', '#IMM8'],   lambda self, il, imm: self.wreg_set(il, 6, il.const(1, imm))],
        [('MOV', 2), ['R7', '#IMM8'],   lambda self, il, imm: self.wreg_set(il, 7, il.const(1, imm))],
        # 0xc0-0xcf
        None,
        None,
        None,
        None,
        [('JMP', 2), ['ADDR11'],		lambda self, il, imm: branch(il, CODE_ADDR(0x600, imm))],
        [('SEL', 1), ['RB0'],           lambda self, il: il.set_flag('BS', il.const(0, 0))],
        [('JZ', 2), ['ADDR8'],		    lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'Z')],
        [('MOV', 1), ['A', 'PSW'],		lambda self, il: il.set_reg(1, 'A', il.reg(1, 'PSW'))],
        [('DEC', 1), ['R0'],		    lambda self, il: self.wreg_set(il, 0, il.sub(1, self.wreg_get(il, 0), il.const(1, 1)))],
        [('DEC', 1), ['R1'],		    lambda self, il: self.wreg_set(il, 1, il.sub(1, self.wreg_get(il, 1), il.const(1, 1)))],
        [('DEC', 1), ['R2'],		    lambda self, il: self.wreg_set(il, 2, il.sub(1, self.wreg_get(il, 2), il.const(1, 1)))],
        [('DEC', 1), ['R3'],		    lambda self, il: self.wreg_set(il, 3, il.sub(1, self.wreg_get(il, 3), il.const(1, 1)))],
        [('DEC', 1), ['R4'],		    lambda self, il: self.wreg_set(il, 4, il.sub(1, self.wreg_get(il, 4), il.const(1, 1)))],
        [('DEC', 1), ['R5'],		    lambda self, il: self.wreg_set(il, 5, il.sub(1, self.wreg_get(il, 5), il.const(1, 1)))],
        [('DEC', 1), ['R6'],		    lambda self, il: self.wreg_set(il, 6, il.sub(1, self.wreg_get(il, 6), il.const(1, 1)))],
        [('DEC', 1), ['R7'],		    lambda self, il: self.wreg_set(il, 7, il.sub(1, self.wreg_get(il, 7), il.const(1, 1)))],
        # 0xd0-0xdf
        [('XRL', 1), ['A', '@R0'],		lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 0))))],
        [('XRL', 1), ['A', '@R1'],		lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 1))))],
        [('JB6', 2), ['ADDR8'],		    lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 6)],
        [('XRL', 2), ['A', '#IMM8'],	lambda self, il, imm: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), il.const(1, imm)))],
        [('CALL', 2), ['ADDR11'],		lambda self, il, imm: call_helper(il, CODE_ADDR(0x600, imm))],
        [('SEL', 1), ['RB1'],           lambda self, il: il.set_flag('BS', il.const(0, 1))],
        None,
        [('MOV', 1), ['PSW', 'A'],      lambda self, il: il.set_reg(1, 'PSW', il.reg(1, 'A'))], # TODO: set/clear flags
        [('XRL', 1), ['A', 'R0'],		lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 0)))],
        [('XRL', 1), ['A', 'R1'],		lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 1)))],
        [('XRL', 1), ['A', 'R2'],		lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 2)))],
        [('XRL', 1), ['A', 'R3'],		lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 3)))],
        [('XRL', 1), ['A', 'R4'],		lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 4)))],
        [('XRL', 1), ['A', 'R5'],		lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 5)))],
        [('XRL', 1), ['A', 'R6'],		lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 6)))],
        [('XRL', 1), ['A', 'R7'],		lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 7)))],
        # 0xe0-0xef
        None,
        None,
        None,
        [('MOVP3', 1), ['A', '@A'],		lambda self, il: il.set_reg(1, 'A', il.load(1, il.or_expr(2, il.const(2, CODE_ADDR(0x300, 0)), il.reg(1, 'A'))))],
        [('JMP', 2), ['ADDR11'],		lambda self, il, imm: branch(il, CODE_ADDR(0x700, imm))],
        [('SEL', 1), ['MB0'],           lambda self, il: il.set_flag('DBF', il.const(0, 0))],
        [('JNC', 2), ['ADDR8'],		   	lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'CY', 0)],
        [('RL', 1), ['A'],		       	lambda self, il: il.set_reg(1, 'A', il.rotate_left(1, il.reg(1, 'A'), il.const(1, 1)))],
        [('DJNZ', 2), ['R0', 'ADDR8'],	lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 0)],
        [('DJNZ', 2), ['R1', 'ADDR8'],	lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 1)],
        [('DJNZ', 2), ['R2', 'ADDR8'],	lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 2)],
        [('DJNZ', 2), ['R3', 'ADDR8'],	lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 3)],
        [('DJNZ', 2), ['R4', 'ADDR8'],	lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 4)],
        [('DJNZ', 2), ['R5', 'ADDR8'],	lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 5)],
        [('DJNZ', 2), ['R6', 'ADDR8'],	lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 6)],
        [('DJNZ', 2), ['R7', 'ADDR8'],	lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 7)],
        # 0xf0-0xff
        [('MOV', 1), ['A', '@R0'],		lambda self, il: il.set_reg(1, 'A', il.load(1, self.wreg_get(il, 0)))],
        [('MOV', 1), ['A', '@R1'],		lambda self, il: il.set_reg(1, 'A', il.load(1, self.wreg_get(il, 1)))],
        [('JB7', 2), ['ADDR8'],		   	lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 7)],
        None,
        [('CALL', 2), ['ADDR11'],		lambda self, il, imm: call_helper(il, CODE_ADDR(0x700, imm))],
        [('SEL', 1), ['MB1'],           lambda self, il: il.set_flag('DBF', il.const(0, 1))],
        [('JC', 2), ['ADDR8'],		    lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'CY', 1)],
        [('RLC', 1), ['A'],		        lambda self, il: il.set_reg(1, 'A', il.rotate_left_carry(1, il.reg(1, 'A'), il.const(1, 1), il.flag('CY')))],
        [('MOV', 1), ['A', 'R0'],		lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 0))],
        [('MOV', 1), ['A', 'R1'],		lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 1))],
        [('MOV', 1), ['A', 'R2'],		lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 2))],
        [('MOV', 1), ['A', 'R3'],		lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 3))],
        [('MOV', 1), ['A', 'R4'],		lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 4))],
        [('MOV', 1), ['A', 'R5'],		lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 5))],
        [('MOV', 1), ['A', 'R6'],		lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 6))],
        [('MOV', 1), ['A', 'R7'],		lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 7))],
    ]

    def get_instruction_info(self, data, addr):

        # instruction lookup
        instruction = self.instructions[ord(data[0])]
        if instruction is None:
            return None

        (opcode, length) = instruction[0]

        result = InstructionInfo()
        result.length = length

        # add branches
        if opcode in ['RET', 'RETI', 'RETR']:
            result.add_branch(BranchType.FunctionReturn)
        elif opcode in ['JMP']:
            # TODO: memory bank selection
            result.add_branch(BranchType.UnconditionalBranch, CODE_ADDR((ord(data[0]) & 0xe0) << 3, ord(data[1])))
        elif opcode in ['JMPP']:
            result.add_branch(BranchType.UnresolvedBranch)
        elif opcode == 'DJNZ' or opcode[0] == 'J':
            # conditional branches
            result.add_branch(BranchType.TrueBranch, CODE_ADDR(addr, ord(data[1])))
            result.add_branch(BranchType.FalseBranch, addr + length)
        elif opcode == 'CALL':
            # TODO: memory bank selection
            result.add_branch(BranchType.CallDestination, CODE_ADDR((ord(data[0]) & 0xe0) << 3, ord(data[1])))
        elif opcode == 'SEL':
            # FIXME: fake branches to support bank switching
            if instruction[1][0] == 'RB0':
                result.add_branch(BranchType.UnconditionalBranch, addr + length, Architecture['{}_rb{}mb{}'.format(self.device, 0, self.mb)])
            elif instruction[1][0] == 'RB1':
                result.add_branch(BranchType.UnconditionalBranch, addr + length, Architecture['{}_rb{}mb{}'.format(self.device, 1, self.mb)])
            elif instruction[1][0] == 'MB0':
                result.add_branch(BranchType.UnconditionalBranch, addr + length, Architecture['{}_rb{}mb{}'.format(self.device, self.rb, 0)])
            elif instruction[1][0] == 'MB1':
                result.add_branch(BranchType.UnconditionalBranch, addr + length, Architecture['{}_rb{}mb{}'.format(self.device, self.rb, 1)])

        return result

    def get_instruction_text(self, data, addr):

        # instruction lookup
        instruction = self.instructions[ord(data[0])]
        if instruction is None:
            return None

        (opcode, length) = instruction[0]

        # opcode
        tokens = [InstructionTextToken(InstructionTextTokenType.InstructionToken, '{:6}'.format(opcode))]

        # operands
        for operand in instruction[1]:
            # add a separator if needed
            if len(tokens) > 1:
                tokens += [InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ',')]
            
            # append suffix for second bank working registers
            if self.rb == 1 and re.match('\@?R\d', operand) is not None:
                operand += '\''

            if operand == '#IMM8':
                immediate = ord(data[1])
                tokens += [InstructionTextToken(InstructionTextTokenType.IntegerToken, '#{:X}H'.format(immediate), immediate)]
            elif operand == 'ADDR8':
                address = (addr & 0xf00) | ord(data[1])
                tokens += [InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, '{:X}H'.format(address), CODE_ADDR(0, address))]
            elif operand == 'ADDR11':
                # TODO: memory bank selection
                address = ((ord(data[0]) & 0xe0) << 3) | ord(data[1])
                tokens += [InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, '{:X}H'.format(address), CODE_ADDR(0, address))]
            elif operand in self.regs:
                tokens += [InstructionTextToken(InstructionTextTokenType.RegisterToken, operand)]
            elif operand[0] == '@' and operand[1:] in self.regs:
                tokens += [InstructionTextToken(InstructionTextTokenType.InstructionToken, '@'), InstructionTextToken(InstructionTextTokenType.RegisterToken, operand[1:])]
            else:
                tokens += [InstructionTextToken(InstructionTextTokenType.TextToken, operand)]
                
        return tokens, length

    def get_instruction_low_level_il(self, data, addr, il):

        # instruction lookup
        instruction = self.instructions[ord(data[0])]
        if instruction is None:
            return None

        (opcode, length) = instruction[0]

        if len(instruction) == 3:
            # instructions are either one byte (opcode) or two bytes (opcode + immediate)
            if length == 1:
                il_instr = instruction[2](self, il)
            else:
                il_instr = instruction[2](self, il, ord(data[1]))

            if isinstance(il_instr, list):
                for i in [i for i in il_instr if i is not None]:
                    il.append(i)
            elif il_instr is not None:
                il.append(il_instr)
        else:
            il.append(il.unimplemented())

        return length

    def get_flag_write_low_level_il(self, op, size, write_type, flag, operands, il):
        if flag == 'CY':
            if op == LowLevelILOperation.LLIL_RRC:
                return il.and_expr(1, il.reg(1, operands[0]), il.const(1, 0x01))
            elif op == LowLevelILOperation.LLIL_RLC:
                return il.and_expr(1, il.reg(1, operands[0]), il.const(1, 0x80))
 
        return Architecture.perform_get_flag_write_low_level_il(self, op, size, write_type, flag, operands, il)

    def wreg_set(self, il, reg, expr):
        if WREG_REG:
            il.append(il.set_reg(1, 'R{}'.format(reg) if self.rb == 0 else 'R{}\''.format(reg), expr))
        else:
            il.append(il.store(1, il.const_pointer(1, reg if self.rb == 0 else reg + 24), expr))

    def wreg_get(self, il, reg):
        if WREG_REG:
            return il.reg(1, 'R{}'.format(reg) if self.rb == 0 else 'R{}\''.format(reg))
        else:
            return il.load(1, il.const_pointer(1, reg if self.rb == 0 else reg + 24))

    def djnz_helper(self, il, addr, reg):

        # decrement the register
        self.wreg_set(il, reg, il.sub(1, self.wreg_get(il, reg), il.const(1, 1)))

        # try to find a label for the branch target
        taken = il.get_label_for_address(il.arch, addr)

        # create taken target
        taken_found = True
        if taken is None:
            taken = LowLevelILLabel()
            taken_found = False

        # create untaken target
        untaken_found = True
        untaken = il.get_label_for_address(il.arch, il.current_address + 2)
        if untaken is None:
            untaken = LowLevelILLabel()
            untaken_found = False

        # generate the conditional branch LLIL
        il.append(il.if_expr(il.compare_not_equal(1, self.wreg_get(il, reg), il.const(1, 0)), taken, untaken))

        # generate a jump to the branch target if a label couldn't be found
        if not taken_found:
            il.mark_label(taken)
            il.append(il.jump(il.const(2, addr)))

        # generate a label for the untaken branch
        if not untaken_found:
            il.mark_label(untaken)
Beispiel #4
0
class XTENSA(Architecture):
    name = 'XTENSA'

    address_size = 4
    default_int_size = 4
    instr_alignment = 3
    max_instr_length = 3

    # register related stuff
    regs = {
        # main registers
        'a0': RegisterInfo('a0', 4),
        'a1': RegisterInfo('a1', 4),
        'a2': RegisterInfo('a2', 4),
        'a3': RegisterInfo('a3', 4),
        'a4': RegisterInfo('a4', 4),
        'a5': RegisterInfo('a5', 4),
        'a6': RegisterInfo('a6', 4),
        'a7': RegisterInfo('a7', 4),
        'a8': RegisterInfo('a8', 4),
        'a9': RegisterInfo('a9', 4),
        'a10': RegisterInfo('a10', 4),
        'a11': RegisterInfo('a11', 4),
        'a12': RegisterInfo('a12', 4),
        'a13': RegisterInfo('a13', 4),
        'a14': RegisterInfo('a14', 4),
        'a15': RegisterInfo('a15', 4),

        # program counter
        'pc': RegisterInfo('pc', 4),

        # special status
        'sar': RegisterInfo('sar', 4)
    }

    stack_pointer = "a1"

    #------------------------------------------------------------------------------
    # CFG building
    #------------------------------------------------------------------------------

    def get_instruction_info(self, data, addr):
        if len(data) < 2 or len(data) > 3: return None
        obj = decode(data, addr)
        if obj.name == "UNKNOWN": return None
        result = InstructionInfo()
        result.length = obj.len

        if obj.name in ["RET", "RET.N"]:
            # RETURN
            result.add_branch(BranchType.FunctionReturn)

        if obj.name in [
                "BALL", "BNALL", "BANY", "BNONE", "BBC", "BBCI", "BBS", "BBSI",
                "BEQ", "BEQI", "BEQZ", "BNE", "BNEI", "BNEZ", "BGE", "BGEI",
                "BGEU", "BGEUI", "BGEZ", "BLT", "BLTI", "BLTU", "BLTUI", "BLTZ"
        ]:
            # CONDITIONAL BRANCH
            for l in obj.prop["format"]:
                if l[0] == "TYPE_LABEL":
                    result.add_branch(BranchType.TrueBranch, l[1])
            result.add_branch(BranchType.FalseBranch, addr + obj.len)

        if obj.name in ["J"]:
            # UNCONDITIONAL JUMP
            for l in obj.prop["format"]:
                if l[0] == "TYPE_LABEL":
                    result.add_branch(BranchType.UnconditionalBranch, l[1])

        if obj.name in ["CALL0", "CALL4", "CALL8", "CALL12"]:
            # DIRECT CALL
            for l in obj.prop["format"]:
                if l[0] == "TYPE_LABEL":
                    result.add_branch(BranchType.CallDestination, l[1])

        if obj.name in ["JX"]:
            # UNCONDITIONAL JUMP TO REGISTER
            result.add_branch(BranchType.IndirectBranch)

        #if obj.name in ["CALLX0", "CALLX4", "CALLX8", "CALLX12"]:
        # CALL TO REGISTER
        #    result.add_branch(BranchType.IndirectBranch)

        return result

    def get_instruction_text(self, data, addr):
        if len(data) < 2 or len(data) > 3: return None
        obj = decode(data, addr)
        if obj.name == "UNKNOWN": return None
        result = []
        result.append(
            InstructionTextToken(InstructionTextTokenType.InstructionToken,
                                 obj.name))

        li = obj.prop["format"]
        for i in range(len(li)):
            result.append(
                InstructionTextToken(
                    InstructionTextTokenType.OperandSeparatorToken, ' '))
            l = li[i]
            if l[0] == "TYPE_REG":
                result.append(
                    InstructionTextToken(
                        InstructionTextTokenType.RegisterToken,
                        "a" + str(l[1])))
            elif l[0] == "TYPE_FREG":
                result.append(
                    InstructionTextToken(
                        InstructionTextTokenType.RegisterToken,
                        "f" + str(l[1])))
            elif l[0] == "TYPE_BREG":
                result.append(
                    InstructionTextToken(
                        InstructionTextTokenType.RegisterToken,
                        "b" + str(l[1])))
            elif l[0] == "TYPE_SREG":
                result.append(
                    InstructionTextToken(
                        InstructionTextTokenType.RegisterToken,
                        "s" + str(l[1])))
            elif l[0] == "TYPE_UREG":
                result.append(
                    InstructionTextToken(
                        InstructionTextTokenType.RegisterToken,
                        "u" + str(l[1])))
            elif l[0] == "TYPE_MREG":
                result.append(
                    InstructionTextToken(
                        InstructionTextTokenType.RegisterToken,
                        "m" + str(l[1])))
            elif l[0] == "TYPE_IMM":
                result.append(
                    InstructionTextToken(InstructionTextTokenType.IntegerToken,
                                         str(l[1]), l[1]))
            elif l[0] == "TYPE_LABEL":
                result.append(
                    InstructionTextToken(
                        InstructionTextTokenType.CodeRelativeAddressToken,
                        '0x%08x' % (l[1]), l[1]))  # PossibleAddressToken?

            if i < len(li) - 1:
                result.append(
                    InstructionTextToken(
                        InstructionTextTokenType.OperandSeparatorToken, ','))

        return result, obj.len

    def get_flag_write_low_level_il(self, op, size, write_type, flag, operands,
                                    il):
        return Architecture.get_flag_write_low_level_il(
            self, op, size, write_type, flag, operands, il)

    def get_instruction_low_level_il(self, data, addr, il):
        if len(data) < 2 or len(data) > 3: return None
        obj = decode(data, addr)
        if obj.name == "UNKNOWN": return None
        il.append(il.unimplemented())
        return obj.len
Beispiel #5
0
class VTIL(Architecture):
    name = "VTIL"
    max_instr_length = 1
    stack_pointer = "$sp"

    regs = {
        "$sp" : RegisterInfo("$sp", 1)
    }

    instructions = {
        "str": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "str"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "),
                InstructionTextToken(InstructionTextTokenType.TextToken, "["),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, "+"),
                InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, "]"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ", "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
            ],
            "operands": [3, 5, 8]
        },
        "ldd": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "ldd"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, ", ["),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, "+"),
                InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, "]"),
            ],
            "operands": [2, 4, 6]
        },
        "te": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "te"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " := ("),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " == "),
                InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, ")")
            ],
            "operands": [2, 4, 6]
        },
        "tne": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "tne"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " := ("),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " != "),
                InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, ")")
            ],
            "operands": [2, 4, 6]
        },
        "tg": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "tg"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " := ("),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " > "),
                InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, ")")
            ],
            "operands": [2, 4, 6]
        },
        "tge": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "tge"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " := ("),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " >= "),
                InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, ")")
            ],
            "operands": [2, 4, 6]
        },
        "tl": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "tl"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " := ("),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " < "),
                InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, ")")
            ],
            "operands": [2, 4, 6]
        },
        "tle": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "tle"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " := ("),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " <= "),
                InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, ")")
            ],
            "operands": [2, 4, 6]
        },
        "tug": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "tug"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " := ("),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " u> "),
                InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, ")")
            ],
            "operands": [2, 4, 6]
        },
        "tuge": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "tuge"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " := ("),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " u>= "),
                InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, ")")
            ],
            "operands": [2, 4, 6]
        },
        "tul": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "tul"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " := ("),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " u< "),
                InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, ")")
            ],
            "operands": [2, 4, 6]
        },
        "tule": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "tule"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " := ("),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " u<= "),
                InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, ")")
            ],
            "operands": [2, 4, 6]
        },
        "ifs": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "ifs"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " := "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " ? "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " : "),
                InstructionTextToken(InstructionTextTokenType.IntegerToken, "0")
            ],
            "operands": [2, 4]
        },
        "js": {
            "tokens": [
                InstructionTextToken(InstructionTextTokenType.InstructionToken, "js"),
                InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " ? "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"),
                InstructionTextToken(InstructionTextTokenType.TextToken, " : "),
                InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN")
            ],
            "operands": [2, 4, 6]
        }
    }

    def get_instruction_info(self, data, addr):
        global active_vtil_file
        result = InstructionInfo()
        result.length = 1

        next_vip, _, _, _, code = find_instruction(addr, active_vtil_file)

        if code != None and code.startswith("js"):
            _, _, true, false = code.split(" ")
            true = find_block_address(int(true, 16), active_vtil_file)
            false = find_block_address(int(false, 16), active_vtil_file)
            result.add_branch(BranchType.TrueBranch, true)
            result.add_branch(BranchType.FalseBranch, false)
        elif code != None and code.startswith("vxcall"):
            addr = find_block_address(next_vip[0], active_vtil_file)
            result.add_branch(BranchType.UnconditionalBranch, addr)
        elif code != None and code.startswith("jmp"):
            if len(next_vip) == 1:
                addr = find_block_address(next_vip[0], active_vtil_file)
                result.add_branch(BranchType.UnconditionalBranch, addr)
            else:
                result.add_branch(BranchType.IndirectBranch)
                for vip in next_vip:
                    result.add_branch(BranchType.UnconditionalBranch, find_block_address(vip, active_vtil_file))
        elif code != None and code.startswith("vexit"):
            result.add_branch(BranchType.FunctionReturn)

        return result

    def get_instruction_text(self, data, addr):
        global active_vtil_file
        tokens = []

        next_vip, sp_index, sp_reset, sp_offset, code = find_instruction(addr, active_vtil_file)
        if code == None:
            tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "ERROR"))
            return tokens, 1

        if sp_index > 0:
            tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "["))
            tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, f"{int(sp_index):>2}", value=sp_index, size=64))
            tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "] "))
        else:
            tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "     "))

        prefix = "-"
        if sp_offset >= 0: prefix = "+"
        sp_offset = abs(sp_offset)

        if sp_reset > 0:
            txt = f">{prefix}{hex(sp_offset)}"
            txt = f"{txt:<6}"
            tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, txt))
        else:
            txt = f" {prefix}{hex(sp_offset)}"
            txt = f"{txt:<6}"
            tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, txt))
        tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, " "))

        
        if " " in code:
            instr, operands = code.split(" ", 1)

            if " " in operands:
                operands = operands.split(" ")
            else:
                operands = [operands]

            if instr in self.instructions.keys():
                token_set = self.instructions[instr]["tokens"]

                for index in self.instructions[instr]["operands"]:
                    operand = operands.pop(0)

                    if "0x" in operand:
                        if instr == "js":
                            token_set[index] = InstructionTextToken(InstructionTextTokenType.GotoLabelToken, f"vip_{operand[2:]}")
                        elif instr == "jmp":
                            token_set[index] = InstructionTextToken(InstructionTextTokenType.GotoLabelToken, f"vip_{hex(next_vip[0])[2:]}")
                        else:
                            token_set[index] = InstructionTextToken(InstructionTextTokenType.IntegerToken, operand, value=int(operand, 16), size=64)
                    else:
                        token_set[index] = InstructionTextToken(InstructionTextTokenType.RegisterToken, operand)
                
                tokens.extend(token_set)
            else:
                # fallback
                tokens.append(InstructionTextToken(InstructionTextTokenType.InstructionToken, instr))
                tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "))
                
                for operand in operands:
                    if "0x" in operand:
                        if instr == "jmp":
                            tokens.append(InstructionTextToken(InstructionTextTokenType.GotoLabelToken, f"vip_{hex(next_vip[0])[2:]}"))
                        else:
                            tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, operand, value=int(operand, 16), size=64))
                    else:
                        tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, operand))
                    tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ", "))
                
                tokens.pop()
        else:
            tokens.append(InstructionTextToken(InstructionTextTokenType.InstructionToken, code))
        
        return tokens, 1
        
    
    def get_instruction_low_level_il(self, data, addr, il):
        pass
Beispiel #6
0
class HigherSubleq64(Architecture):
    name = "hsq64"
    address_size = 8
    default_int_size = 8
    instr_alignment = 1
    max_instr_length = address_size * 32
    disassembler: HsqDisassembler = None

    regs = {
        "sp": RegisterInfo("sp", 8),
        "bp": RegisterInfo("bp", 8),
        "ax": RegisterInfo("ax", 8),
    }
    stack_pointer = "sp"

    def get_instruction_info(self, data, addr):
        instr = self.disassembler.instrs[addr // self.address_size]
        if instr is None:
            return None

        result = InstructionInfo()
        result.length = instr.width * self.address_size
        next_addr = instr.get_next_addr()
        if isinstance(instr, Call):
            result.add_branch(BranchType.CallDestination,
                              instr.c * self.address_size)
        elif isinstance(instr, Ret) or isinstance(instr, Exit):
            result.add_branch(BranchType.FunctionReturn)
        else:
            if len(next_addr) == 2:
                result.add_branch(BranchType.TrueBranch,
                                  next_addr[1] * self.address_size)
                result.add_branch(BranchType.FalseBranch,
                                  next_addr[0] * self.address_size)
            elif len(next_addr) == 1:
                result.add_branch(BranchType.UnconditionalBranch,
                                  next_addr[0] * self.address_size)
        return result

    def get_instruction_text(self, data, addr):
        instr = self.disassembler.instrs[addr // self.address_size]
        if instr is None:
            return None

        tokens = []
        tokens.append(
            InstructionTextToken(InstructionTextTokenType.TextToken,
                                 type(instr).__name__.lower()))
        tokens.append(
            InstructionTextToken(InstructionTextTokenType.TextToken, " "))
        for i in range(len(instr.operands)):
            operand = instr.operands[i]
            if self.disassembler.is_register(operand):
                tokens.append(
                    InstructionTextToken(
                        InstructionTextTokenType.RegisterToken,
                        self.disassembler.symbol[operand],
                    ))
            else:
                tokens.append(
                    InstructionTextToken(
                        InstructionTextTokenType.PossibleAddressToken,
                        hex(operand * self.address_size),
                    ))

            if i != len(instr.operands) - 1:
                tokens.append(
                    InstructionTextToken(
                        InstructionTextTokenType.OperandSeparatorToken, ", "))

        return tokens, instr.width * self.address_size

    def get_instruction_low_level_il(self, data, addr, il: LowLevelILFunction):
        instr = self.disassembler.instrs[addr // self.address_size]
        if instr is None:
            return None

        if isinstance(instr, Subleq):
            a, b, c = instr.a, instr.b, instr.c

            _, mem_a = self.get_addr_mem_il(a, il)
            addr_b, mem_b = self.get_addr_mem_il(b, il)
            sub_op = il.sub(self.address_size, mem_b, mem_a)
            if self.disassembler.is_register(b):
                store_b = il.set_reg(self.address_size,
                                     self.disassembler.symbol[b], sub_op)
            else:
                store_b = il.store(self.address_size, addr_b, sub_op)
            il.append(store_b)
            less_op = il.compare_signed_less_equal(
                self.address_size, mem_b, il.const(self.address_size, 0))

            t_target = il.get_label_for_address(il.arch, c * self.address_size)
            t_label_found = True
            if t_target is None:
                t_label_found = False
                t_target = LowLevelILLabel()

            f_label_found = True
            f_target = il.get_label_for_address(
                il.arch, addr + instr.width + self.address_size)
            if f_target is None:
                f_target = LowLevelILLabel()
                f_label_found = False

            il.append(il.if_expr(less_op, t_target, f_target))

            if not t_label_found:
                il.mark_label(t_target)
                il.append(
                    il.jump(il.const(self.address_size,
                                     c * self.address_size)))
            if not f_label_found:
                il.mark_label(f_target)
        elif isinstance(instr, Clear):
            b = instr.b
            c = instr.c

            addr_b, _ = self.get_addr_mem_il(b, il)
            store_b = il.store(self.address_size, addr_b,
                               il.const(self.address_size, 0))
            il.append(store_b)
            jump_c = il.jump(
                il.const(self.address_size, instr.c * self.address_size))
            il.append(jump_c)

        elif isinstance(instr, Push):
            v = instr.v

            addr_v, mem_v = self.get_addr_mem_il(v, il)
            push_v = il.push(self.address_size, mem_v)

            il.append(push_v)
        elif isinstance(instr, Mov):
            a, b = instr.a, instr.b

            addr_a, mem_a = self.get_addr_mem_il(a, il)
            addr_b, mem_b = self.get_addr_mem_il(b, il)
            if self.disassembler.is_register(b):
                mov_op = il.set_reg(self.address_size,
                                    self.disassembler.symbol[b], mem_a)
            else:
                mov_op = il.store(self.address_size, addr_b, mem_a)
            il.append(mov_op)
        elif isinstance(instr, Ret):
            il.append(
                il.ret(
                    il.load(self.address_size, il.reg(self.address_size,
                                                      "sp"))))
        elif isinstance(instr, Pop):
            v = instr.v
            addr_v, _ = self.get_addr_mem_il(v, il)
            pop_op = il.pop(self.address_size)
            if self.disassembler.is_register(v):
                store_op = il.set_reg(self.address_size,
                                      self.disassembler.symbol[v], pop_op)
            else:
                store_op = il.store(self.address_size, addr_v, pop_op)
            il.append(store_op)
        elif isinstance(instr, Call):
            il.append(
                il.call(
                    il.const(self.address_size, instr.c * self.address_size)))
        elif isinstance(instr, Inc):
            b = instr.b
            addr_b, mem_b = self.get_addr_mem_il(b, il)
            if self.disassembler.is_register(b):
                store_op = il.set_reg(
                    self.address_size,
                    self.disassembler.symbol[b],
                    il.add(self.address_size, mem_b,
                           il.const(self.address_size, 1)),
                )
            else:
                store_op = il.store(
                    self.address_size,
                    addr_b,
                    il.add(self.address_size, mem_b,
                           il.const(self.address_size, 1)),
                )
            il.append(store_op)
        elif isinstance(instr, Dec):
            b = instr.b
            addr_b, mem_b = self.get_addr_mem_il(b, il)
            if self.disassembler.is_register(b):
                store_op = il.set_reg(
                    self.address_size,
                    self.disassembler.symbol[b],
                    il.add(self.address_size, mem_b,
                           il.const(self.address_size, -1)),
                )
            else:
                store_op = il.store(
                    self.address_size,
                    addr_b,
                    il.add(self.address_size, mem_b,
                           il.const(self.address_size, -1)),
                )
            il.append(store_op)
        elif isinstance(instr, Exit):
            il.append(il.no_ret())
        elif isinstance(instr, Jmp):
            il.append(
                il.jump(
                    il.const(self.address_size, instr.c * self.address_size)))

        return instr.width * self.address_size

    def get_addr_mem_il(self, addr, il):
        if self.disassembler.is_register(addr):
            addr_il = il.reg(self.address_size, self.disassembler.symbol[addr])
            mem_il = addr_il
        else:
            addr_il = il.const_pointer(self.address_size,
                                       addr * self.address_size)
            mem_il = il.load(self.address_size, addr_il)
        return addr_il, mem_il
Beispiel #7
0
class MCS51(Architecture):
    """
    Capitalization convention: memory-mapped stuff in allcaps, bits and true
    registers lower? Except r0-r7, also lower? Foolish consistency.
    """
    name = "8051"

    # C 'pointers' tend to be 3 bytes, but architecture-wise it's just 2?
    # Our fake address space that keeps all flash banks mapped needs 3.
    # Full XRAM/IRAM tags need 5.
    address_size = 2  # sets default return value size, nothing else... ???

    endianness = Endianness.BigEndian  # up to compiler... needs to be chosen

    default_int_size = 1
    max_instr_length = 3
    stack_pointer = 'SP'

    regs = {r: RegisterInfo(r, 1)
            for r in [
                'SP',
                'A',
                'B',
            ]}
    regs['DPTR'] = RegisterInfo('DPTR', 2)
    regs['DPL'] = RegisterInfo('DPTR', 1)
    regs['DPH'] = RegisterInfo('DPTR', 1, 1)  # FIXME what endianness is this?

    if 0:
        regs.update(
            {r: RegisterInfo(r, 1)
             for r in ['R%d' % n for n in range(8)]})
    else:
        # This is cute, but I'm not yet sure if it's useful. Register merging
        # doesn't come in until HLIL?
        #
        # On closer look, this might be the only way to make calling
        # conventions work. At least as they are now.
        # Need to re-visit once this subregister bug is fixed:
        # https://github.com/Vector35/binaryninja-api/issues/715
        regs['PTR'] = RegisterInfo('Y0', 3,
                                   1)  # C pointers under some compilers

        regs['Y0'] = RegisterInfo('Y0', 4)
        regs['Y4'] = RegisterInfo('Y4', 4)

        regs['T0'] = RegisterInfo('Y0', 2)
        regs['T2'] = RegisterInfo('Y0', 2, 2)
        regs['T4'] = RegisterInfo('Y4', 2)
        regs['T6'] = RegisterInfo('Y4', 2, 2)

        regs['R0'] = RegisterInfo('Y0', 1)
        regs['R1'] = RegisterInfo('Y0', 1, 1)
        regs['R2'] = RegisterInfo('Y0', 1, 2)
        regs['R3'] = RegisterInfo('Y0', 1, 3)
        regs['R4'] = RegisterInfo('Y4', 1)
        regs['R5'] = RegisterInfo('Y4', 1, 1)
        regs['R6'] = RegisterInfo('Y4', 1, 2)
        regs['R7'] = RegisterInfo('Y4', 1, 3)

    flags = [
        # actual flags stored in PSW special function register:
        'p',  # parity of accumulator
        #'ud', # user defined/unused by base hardware
        'ov',  # signed overflow on add
        #'rs0', 'rs1', # R0-R7 register bank select
        #'f0', # software use, like ud
        'ac',  # aux carry, because BCD is *important*!
        'c',

        # synthesized flags:
        'z',  # "There is no zero bit in the PSW. The JZ and JNZ instructions
        's',  #  test the Accumulator data for that condition."
    ]
    flag_write_types = [
        '',  # first element *might* be ignored due to known bug
        'c',
        'zsp',  # modify A, without touching other flags
        'zspc',  # modify A and carry flag
        'zspc ov',  # */ operations
        #'zspc ov ac', # +- operations
        '*',  # +- operations
        # should mov indirect into PSW/ACC have its own flag settings?
    ]
    flags_written_by_flag_write_type = {
        'c': ['c'],
        'zsp': ['z', 's', 'p'],
        'zspc': ['z', 's', 'p', 'c'],
        #'zspc ov': ['z','s','p','c','ov'],
        '*': ['z', 's', 'p', 'c', 'ov', 'ac'],
    }
    flag_roles = {
        # real:
        'c': FlagRole.CarryFlagRole,
        'ac': FlagRole.HalfCarryFlagRole,
        'ov': FlagRole.OverflowFlagRole,
        'p': FlagRole.OddParityFlagRole,
        # imaginary:
        's': FlagRole.NegativeSignFlagRole,
        'z': FlagRole.ZeroFlagRole,
    }
    flags_required_for_flag_condition = {
        LowLevelILFlagCondition.LLFC_E: ["z"],
        LowLevelILFlagCondition.LLFC_NE: ["z"],
        LowLevelILFlagCondition.LLFC_NEG: ["s"],
        LowLevelILFlagCondition.LLFC_POS: ["s"],
        LowLevelILFlagCondition.LLFC_UGE: ["c"],
        LowLevelILFlagCondition.LLFC_ULT: ["c"],
        # not set by nes.py, going to try setting:
        LowLevelILFlagCondition.LLFC_O: ["ov"],
        LowLevelILFlagCondition.LLFC_NO: ["ov"],
    }

    def perform_get_instruction_info(self, data, addr):
        if not len(data):
            return  # edge case during linear sweep
        nfo = InstructionInfo()
        # ana
        size, branch = self.lut.branches[ord(data[0])]
        nfo.length = size
        # emu
        if branch:
            branch_type, target = branch
            if callable(target):
                target = target(data, addr, size) if size <= len(data) else 0
            if branch_type == BranchType.CallDestination:
                # TODO: keep track of return-effect functions, tweak target +=dx
                pass
                # TODO: arch is probably global; need to store this in bv somehow :|
            nfo.add_branch(branch_type, target=target)
            if branch_type == BranchType.TrueBranch:
                nfo.add_branch(BranchType.FalseBranch, addr + size)
        return nfo

    def perform_get_instruction_text(self, data, addr):
        # ana
        size, vals = self.lut.decoders[ord(data[0])]
        assert len(data) >= size
        vals = [decoder(data, addr, size) for decoder in vals]
        # out / outop
        toks = self.lut.text[ord(data[0])]
        return out.render(toks, vals), size

    def perform_get_instruction_low_level_il(self, data, addr, il):
        # ana
        code = ord(data[0])
        size, vals = self.lut.decoders[code]
        if len(data) < size:
            # incomplete code due to disassembling data or missing memory
            return size  # abort further analysis before it errors
        vals = [decoder(data, addr, size) for decoder in vals]
        # sem
        build = llil_mangler.patch_at(self, addr) or self.lut.llil[code]
        size_override = build(il, vals, addr)
        return size_override if size_override != None else size

    #def perform_get_flag_condition_low_level_il(self, cond, il):
    #    il.append(il.unimplemented())
    def perform_get_flag_write_low_level_il(self, op, size, write_type, flag,
                                            operands, il):
        # This can't be right; why doesn't it work on its own?
        if 0 and flag == 'c':
            fun = self.get_default_flag_write_low_level_il
            return fun(op, size, FlagRole.CarryFlagRole, operands, il)
        elif 0 and op == LowLevelILOperation.LLIL_RLC:
            #return il.const(0, 1)
            return il.test_bit(1, il.reg(1, operands[0]), il.const(0, 0x80))
        elif 0 and op == LowLevelILOperation.LLIL_RRC:
            #return il.const(0, 1)
            return il.test_bit(1, il.reg(1, operands[0]), il.const(0, 0x01))
        else:
            fun = Architecture.perform_get_flag_write_low_level_il
            retval = fun(self, op, size, write_type, flag, operands, il)
            #log_info('flag_write '+hex(il.current_address)+' | '+repr(retval)+' | '+repr((op, size, write_type, flag, operands, il)))
            return retval

            flag = self.get_flag_index(flag)
            return self.get_default_flag_write_low_level_il(
                op, size, self._flag_roles[flag], operands, il)
            # default fallback

        if 0 and op == LowLevelILOperation.LLIL_SBB and flag == 'c':
            left, right, carry = operands
            return il.logical_shift_right(
                1, il.sub(1, left, il.add(1, right, carry)), il.const(1, 8))
        if 0 and flag == 'c':
            fun = self.get_default_flag_write_low_level_il
            return fun(op, size, FlagRole.CarryFlagRole, operands, il)
        if 0:
            fun = self.get_default_flag_write_low_level_il
            return fun(op, size, FlagRole.CarryFlagRole, operands, il)

    @specification.lazy_memoized_property
    def lut(self):
        """Look up tables generated once.

        All available architectures are *instantiated* on start, even if never
        used. To be a good neighbour but still get to write fun code, complex
        processing should be deferred until needed using this decorator.
        """

        luts = Tables()
        if 1:  # DEBUG
            urls = [
                ('spu plugin',
                 'https://github.com/bambu/binaryninja-spu/blob/master/spu.py'
                 ),
                ('nes plugin',
                 'https://github.com/Vector35/binaryninja-api/blob/dev/python/examples/nes.py'
                 ),
                ('m68k plugin',
                 'https://github.com/alexforencich/binaryninja-m68k/blob/master/__init__.py'
                 ),
            ]
            md = '## Still Unlifted\n\n' + luts.unlifted
            md += '\n\n## Reference Examples\n\n'
            for title, url in urls:
                md += '- [{0}]({1})\n'.format(title, url)
            binaryninja.show_markdown_report("Architecture Progress", md)
        return luts

    def perform_get_associated_arch_by_address(self, addr):
        # Waaait a second. add_branch has an optional 'arch' argument
        #
        # Can I branch from x86 into BPF? Or .NET IL? Or obfs. interpreter
        # uops? In one idb?
        # OMG IF YES TEST TEST TEST THIS omg, there's even a hinter
        #
        # guess this is from arm thumb shenanigans? or 32/64 in general?
        return self, addr

    ##
    ## That from-IDA patching thing them game hackers are so keen on...
    ##

    def perform_always_branch(self, data, addr):
        return  # TODO do this, even if that's not how you normally patch

    def perform_convert_to_nop(data, addr):
        return

    def perform_assemble(code, addr):
        # TODO either hand-assemble, or find some nice embeddable asm /w
        # macros and proper labels and stuff? will need to double-check syntax
        # compat
        # also TODO: sdcc 8051 training binary
        return
Beispiel #8
0
class Z80(Architecture):
    name = 'Z80'

    address_size = 2
    default_int_size = 1
    instr_alignment = 1
    max_instr_length = 4

    # register related stuff
    regs = {
        # main registers
        'AF': RegisterInfo('AF', 2),
        'BC': RegisterInfo('BC', 2),
        'DE': RegisterInfo('DE', 2),
        'HL': RegisterInfo('HL', 2),

        # alternate registers
        "AF'": RegisterInfo("AF'", 2),
        "BC'": RegisterInfo("BC'", 2),
        "DE'": RegisterInfo("DE'", 2),
        "HL'": RegisterInfo("HL'", 2),

        # main registers (sub)
        "A": RegisterInfo("AF", 1, 1),
        "F": RegisterInfo("AF", 1, 0),
        "B": RegisterInfo("BC", 1, 1),
        "C": RegisterInfo("BC", 1, 0),
        "D": RegisterInfo("DE", 1, 1),
        "E": RegisterInfo("DE", 1, 0),
        "H": RegisterInfo("HL", 1, 1),
        "L": RegisterInfo("HL", 1, 0),
        "Flags": RegisterInfo("AF", 0),

        # alternate registers (sub)
        "A'": RegisterInfo("AF'", 1, 1),
        "F'": RegisterInfo("AF'", 1, 0),
        "B'": RegisterInfo("BC'", 1, 1),
        "C'": RegisterInfo("BC'", 1, 0),
        "D'": RegisterInfo("DE'", 1, 1),
        "E'": RegisterInfo("DE'", 1, 0),
        "H'": RegisterInfo("HL'", 1, 1),
        "L'": RegisterInfo("HL'", 1, 0),
        "Flags'": RegisterInfo("AF'", 0),

        # index registers
        'IX': RegisterInfo('IX', 2),
        'IY': RegisterInfo('IY', 2),
        'SP': RegisterInfo('SP', 2),

        # other registers
        'I': RegisterInfo('I', 1),
        'R': RegisterInfo('R', 1),

        # program counter
        'PC': RegisterInfo('PC', 2),

        # status
        'status': RegisterInfo('status', 1)
    }

    stack_pointer = "SP"

    #------------------------------------------------------------------------------
    # FLAG fun
    #------------------------------------------------------------------------------

    flags = ['s', 'z', 'h', 'pv', 'n', 'c']

    # remember, class None is default/integer
    semantic_flag_classes = ['class_bitstuff']

    # flag write types and their mappings
    flag_write_types = ['dummy', '*', 'c', 'z', 'cszpv', 'not_c']
    flags_written_by_flag_write_type = {
        'dummy': [],
        '*': ['s', 'z', 'h', 'pv', 'n', 'c'],
        'c': ['c'],
        'z': ['z'],
        'not_c': ['s', 'z', 'h', 'pv', 'n']  # eg: z80's DEC
    }
    semantic_class_for_flag_write_type = {
        # by default, everything is type None (integer)
        #        '*': 'class_integer',
        #        'c': 'class_integer',
        #        'z': 'class_integer',
        #        'cszpv': 'class_integer',
        #        'not_c': 'class_integer'
    }

    # groups and their mappings
    semantic_flag_groups = ['group_e', 'group_ne', 'group_lt']
    flags_required_for_semantic_flag_group = {
        'group_lt': ['c'],
        'group_e': ['z'],
        'group_ne': ['z']
    }
    flag_conditions_for_semantic_flag_group = {
        #'group_e': {None: LowLevelILFlagCondition.LLFC_E},
        #'group_ne': {None: LowLevelILFlagCondition.LLFC_NE}
    }

    # roles
    flag_roles = {
        's': FlagRole.NegativeSignFlagRole,
        'z': FlagRole.ZeroFlagRole,
        'h': FlagRole.HalfCarryFlagRole,
        'pv': FlagRole.
        OverflowFlagRole,  # actually overflow or parity: TODO: implement later
        'n': FlagRole.
        SpecialFlagRole,  # set if last instruction was a subtraction (incl. CP)
        'c': FlagRole.CarryFlagRole
    }

    # MAP (condition x class) -> flags
    def get_flags_required_for_flag_condition(self, cond, sem_class):
        #LogDebug('incoming cond: %s, incoming sem_class: %s' % (str(cond), str(sem_class)))

        if sem_class == None:
            lookup = {
                # Z, zero flag for == and !=
                LowLevelILFlagCondition.LLFC_E: ['z'],
                LowLevelILFlagCondition.LLFC_NE: ['z'],
                # S, sign flag is in NEG and POS
                LowLevelILFlagCondition.LLFC_NEG: ['s'],
                # Z, zero flag for == and !=
                LowLevelILFlagCondition.LLFC_E: ['z'],
                LowLevelILFlagCondition.LLFC_NE: ['z'],
                # H, half carry for ???
                # P, parity for ???
                # s> s>= s< s<= done by sub and overflow test
                #if cond == LowLevelILFlagCondition.LLFC_SGT:
                #if cond == LowLevelILFlagCondition.LLFC_SGE:
                #if cond == LowLevelILFlagCondition.LLFC_SLT:
                #if cond == LowLevelILFlagCondition.LLFC_SLE:

                # C, for these
                LowLevelILFlagCondition.LLFC_UGE: ['c'],
                LowLevelILFlagCondition.LLFC_ULT: ['c']
            }

            if cond in lookup:
                return lookup[cond]

        return []

#------------------------------------------------------------------------------
# CFG building
#------------------------------------------------------------------------------

    def get_instruction_info(self, data, addr):
        decoded = decode(data, addr)

        # on error, return nothing
        if decoded.status == DECODE_STATUS.ERROR or decoded.len == 0:
            return None

        # on non-branching, return length
        result = InstructionInfo()
        result.length = decoded.len
        if decoded.typ != INSTRTYPE.JUMP_CALL_RETURN:
            return result

        # jp has several variations
        if decoded.op == OP.JP:
            (oper_type, oper_val) = decoded.operands[0]

            # jp pe,0xDEAD
            if oper_type == OPER_TYPE.COND:
                assert decoded.operands[1][0] == OPER_TYPE.ADDR
                result.add_branch(BranchType.TrueBranch,
                                  decoded.operands[1][1])
                result.add_branch(BranchType.FalseBranch, addr + decoded.len)
            # jp (hl); jp (ix); jp (iy)
            elif oper_type in [
                    OPER_TYPE.REG_DEREF, OPER_TYPE.MEM_DISPL_IX,
                    OPER_TYPE.MEM_DISPL_IY
            ]:
                result.add_branch(BranchType.IndirectBranch)
            # jp 0xDEAD
            elif oper_type == OPER_TYPE.ADDR:
                result.add_branch(BranchType.UnconditionalBranch, oper_val)
            else:
                raise Exception('handling JP')

        # jr can be conditional
        elif decoded.op == OP.JR:
            (oper_type, oper_val) = decoded.operands[0]

            # jr c,0xdf07
            if oper_type == OPER_TYPE.COND:
                assert decoded.operands[1][0] == OPER_TYPE.ADDR
                result.add_branch(BranchType.TrueBranch,
                                  decoded.operands[1][1])
                result.add_branch(BranchType.FalseBranch, addr + decoded.len)
            # jr 0xdf07
            elif oper_type == OPER_TYPE.ADDR:
                result.add_branch(BranchType.UnconditionalBranch, oper_val)
            else:
                raise Exception('handling JR')

        # djnz is implicitly conditional
        elif decoded.op == OP.DJNZ:
            (oper_type, oper_val) = decoded.operands[0]
            assert oper_type == OPER_TYPE.ADDR
            result.add_branch(BranchType.TrueBranch, oper_val)
            result.add_branch(BranchType.FalseBranch, addr + decoded.len)

        # call can be conditional
        elif decoded.op == OP.CALL:
            (oper_type, oper_val) = decoded.operands[0]
            # call c,0xdf07
            if oper_type == OPER_TYPE.COND:
                assert decoded.operands[1][0] == OPER_TYPE.ADDR
                result.add_branch(BranchType.CallDestination,
                                  decoded.operands[1][1])
            # call 0xdf07
            elif oper_type == OPER_TYPE.ADDR:
                result.add_branch(BranchType.CallDestination, oper_val)
            else:
                raise Exception('handling CALL')

        # ret can be conditional
        elif decoded.op == OP.RET:
            if decoded.operands and decoded.operands[0][0] == OPER_TYPE.COND:
                # conditional returns dont' end block
                pass
            else:
                result.add_branch(BranchType.FunctionReturn)

        # ret from interrupts
        elif decoded.op == OP.RETI or decoded.op == OP.RETN:
            result.add_branch(BranchType.FunctionReturn)

        return result

#------------------------------------------------------------------------------
# STRING building, disassembly
#------------------------------------------------------------------------------

    def reg2str(self, r):
        reg_name = r.name
        return reg_name if reg_name[-1] != '_' else reg_name[:-1] + "'"

# from api/python/function.py:
#
#        TextToken                  Text that doesn't fit into the other tokens
#        InstructionToken           The instruction mnemonic
#        OperandSeparatorToken      The comma or whatever else separates tokens
#        RegisterToken              Registers
#        IntegerToken               Integers
#        PossibleAddressToken       Integers that are likely addresses
#        BeginMemoryOperandToken    The start of memory operand
#        EndMemoryOperandToken      The end of a memory operand
#        FloatingPointToken         Floating point number

    def get_instruction_text(self, data, addr):
        decoded = decode(data, addr)
        if decoded.status != DECODE_STATUS.OK or decoded.len == 0:
            return None

        result = []

        # opcode
        result.append(InstructionTextToken( \
            InstructionTextTokenType.InstructionToken, decoded.op.name))

        # space for operand
        if decoded.operands:
            result.append(
                InstructionTextToken(InstructionTextTokenType.TextToken, ' '))

        # operands
        for i, operand in enumerate(decoded.operands):
            (oper_type, oper_val) = operand

            if oper_type == OPER_TYPE.REG:
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.RegisterToken, self.reg2str(oper_val)))

            elif oper_type == OPER_TYPE.REG_DEREF:
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.BeginMemoryOperandToken, '('))
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.RegisterToken, self.reg2str(oper_val)))
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.EndMemoryOperandToken, ')'))

            elif oper_type == OPER_TYPE.ADDR:
                if oper_val < 0:
                    oper_val = oper_val & 0xFFFF
                txt = '0x%04x' % oper_val
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.PossibleAddressToken, txt, oper_val))

            elif oper_type == OPER_TYPE.ADDR_DEREF:
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.BeginMemoryOperandToken, '('))
                txt = '0x%04x' % oper_val
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.PossibleAddressToken, txt, oper_val))
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.EndMemoryOperandToken, ')'))

            elif oper_type in [OPER_TYPE.MEM_DISPL_IX, OPER_TYPE.MEM_DISPL_IY]:
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.BeginMemoryOperandToken, '('))

                txt = 'IX' if oper_type == OPER_TYPE.MEM_DISPL_IX else 'IY'
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.RegisterToken, txt))

                if oper_val == 0:
                    # omit displacement of 0
                    pass
                elif oper_val >= 16:
                    # (iy+0x28)
                    result.append(InstructionTextToken( \
                        InstructionTextTokenType.TextToken, '+'))
                    result.append(InstructionTextToken( \
                        InstructionTextTokenType.IntegerToken, '0x%X' % oper_val, oper_val))
                elif oper_val > 0:
                    result.append(InstructionTextToken( \
                        InstructionTextTokenType.TextToken, '+'))
                    result.append(InstructionTextToken( \
                        InstructionTextTokenType.IntegerToken, '%d' % oper_val, oper_val))
                elif oper_val <= -16:
                    # adc a,(ix-0x55)
                    result.append(InstructionTextToken( \
                        InstructionTextTokenType.TextToken, '-'))
                    result.append(InstructionTextToken( \
                        InstructionTextTokenType.IntegerToken, '0x%X' % (-oper_val), oper_val))
                else:
                    result.append(InstructionTextToken( \
                        InstructionTextTokenType.IntegerToken, '%d' % oper_val, oper_val))

                result.append(InstructionTextToken( \
                    InstructionTextTokenType.EndMemoryOperandToken, ')'))

            elif oper_type == OPER_TYPE.IMM:
                if oper_val == 0:
                    txt = '0'
                elif oper_val >= 16:
                    txt = '0x%x' % oper_val
                else:
                    txt = '%d' % oper_val

                result.append(InstructionTextToken( \
                    InstructionTextTokenType.IntegerToken, txt, oper_val))

            elif oper_type == OPER_TYPE.COND:
                txt = CC_TO_STR[oper_val]
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.TextToken, txt))

            elif oper_type in [OPER_TYPE.REG_C_DEREF, OPER_TYPE.REG_BC_DEREF, OPER_TYPE.REG_DE_DEREF, \
                OPER_TYPE.REG_HL_DEREF, OPER_TYPE.REG_SP_DEREF]:

                result.append(InstructionTextToken( \
                    InstructionTextTokenType.BeginMemoryOperandToken, '('))
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.RegisterToken, self.reg2str(oper_val)))
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.EndMemoryOperandToken, ')'))

            else:
                raise Exception('unknown operand type: ' + str(oper_type))

            # if this isn't the last operand, add comma
            if i < len(decoded.operands) - 1:
                result.append(InstructionTextToken( \
                    InstructionTextTokenType.OperandSeparatorToken, ','))

        # crazy undoc shit
        if decoded.metaLoad:
            extras = []
            (oper_type, oper_val) = decoded.metaLoad
            assert oper_type == OPER_TYPE.REG
            extras.append(InstructionTextToken( \
                InstructionTextTokenType.InstructionToken, 'ld'))
            extras.append(InstructionTextToken( \
                InstructionTextTokenType.TextToken, ' '))
            extras.append(InstructionTextToken( \
                InstructionTextTokenType.RegisterToken, self.reg2str(oper_val)))
            extras.append(InstructionTextToken( \
                InstructionTextTokenType.OperandSeparatorToken, ','))

            result = extras + result

        return result, decoded.len

#------------------------------------------------------------------------------
# LIFTING
#------------------------------------------------------------------------------

    def get_flag_write_low_level_il(self, op, size, write_type, flag, operands,
                                    il):
        flag_il = Z80IL.gen_flag_il(op, size, write_type, flag, operands, il)
        if flag_il:
            return flag_il

        return Architecture.get_flag_write_low_level_il(
            self, op, size, write_type, flag, operands, il)

    def get_instruction_low_level_il(self, data, addr, il):
        decoded = decode(data, addr)
        if decoded.status != DECODE_STATUS.OK or decoded.len == 0:
            return None

        Z80IL.gen_instr_il(addr, decoded, il)

        return decoded.len
Beispiel #9
0
class Smali(Architecture):  # type: ignore
    """Architecture class for disassembling Dalvik bytecode into Smali

    Initializing the class calls android.smali.load_insns(), which imports
    cached instruction information from "android/instruction_data.pickle".

    The three mandatory Architecture functions are implemented:
        - get_instruction_info
        - get_instruction_text
        - get_instruction_low_level_il

    There is also load_dex(), which is called the first time any of the three
    functions are called. It grabs the reference to DexFile.
    """

    name = "Smali"

    # FIXME there should be 65536 registers, but binja hangs when the number gets above a thousand or so
    regs = dict(
        {f"v{i}": RegisterInfo(f"v{i}", 4) for i in range(256)},
        pc=RegisterInfo("pc", 4),
        fp=RegisterInfo("fp", 4),
        sp=RegisterInfo("sp", 4),
    )
    stack_pointer = "sp"
    max_instr_length = 200
    instr_alignment = 2

    def __init__(self) -> None:
        self.insns = load_insns()
        self.inialized_df: bool = False
        super().__init__()

    def load_dex(self) -> None:
        """Load DexFile from disk. Should only be called once."""
        # FIXME all tabs in a window share the same Architecture class,
        # apparently. This means that, as far as I know, there's no way to
        # store this information per-tab. This could be hacked around if there
        # was a way to determine what binary is opened, but I don't see a way
        # to do that either.
        #
        # Edit: The settings API seems to provide a way to do this, but the
        # 'Context' instance doesn't seem to work.
        # https://api.binary.ninja/binaryninja.settings-module.html
        # Settings('Context').register_group('newgrp', 'asdfasdf')
        # Settings('Context').register_setting(
        #     'newgrp.asdff',
        #     '{"description" : "test descr", "title" : "test title", "default" : "asd", "type" : "string"}',
        # )
        # Setting group: newgrp does not exist!
        self.df: DexFile = Architecture['Smali'].df
        self.inialized_df = True

    def get_instruction_info(self, data: bytes, addr: FileOffset) -> InstructionInfo:

        if not self.inialized_df:
            self.load_dex()
        ii = InstructionInfo()

        # Handle pseudoinstructions
        if data[0] == 0 and data[1] != 0:
            if data[1] > 3:
                ii.length = 2
                return ii
            ii.length = min(
                self.max_instr_length, self.df.pseudoinstructions[addr]._total_size
            )
            ii.add_branch(BranchType.FunctionReturn)
            return ii

        # Handle normal instructions
        insn_info = self.insns[data[0]]
        ii.length = insn_info.fmt.insn_len * 2

        if insn_info.mnemonic.startswith("return"):
            ii.add_branch(BranchType.FunctionReturn)
        elif insn_info.mnemonic == "throw":
            ii.add_branch(BranchType.ExceptionBranch)
            # TODO
        elif insn_info.mnemonic.startswith("goto"):
            data_to_parse = endian_swap_shorts(data[: 2 * insn_info.fmt.insn_len])
            args = parse_with_format(data_to_parse, insn_info.fmt.format_)
            offset = sign(args["A"], insn_info.fmt.format_.count("A"))
            ii.add_branch(BranchType.UnconditionalBranch, target=addr + offset * 2)
        elif (
            insn_info.mnemonic == "packed-switch"
            or insn_info.mnemonic == "sparse-switch"
        ):
            data_to_parse = endian_swap_shorts(data[: 2 * insn_info.fmt.insn_len])
            args = parse_with_format(data_to_parse, insn_info.fmt.format_)
            offset = sign(args["B"], insn_info.fmt.format_.count("B"))
            ii.add_branch(BranchType.UnresolvedBranch)
            # Adding more than 2 branches causes binja to segfault, so this has
            # to be handled in LLIL instead.
        elif insn_info.mnemonic == "fill-array-data":
            data_to_parse = endian_swap_shorts(data[: 2 * insn_info.fmt.insn_len])
            args = parse_with_format(data_to_parse, insn_info.fmt.format_)
            offset = sign(args["B"], insn_info.fmt.format_.count("B"))
            ii.add_branch(BranchType.TrueBranch, target=addr + offset * 2)
            ii.add_branch(
                BranchType.FalseBranch, target=addr + insn_info.fmt.insn_len * 2
            )
        elif insn_info.mnemonic.startswith("if-"):
            data_to_parse = endian_swap_shorts(data[: 2 * insn_info.fmt.insn_len])
            args = parse_with_format(data_to_parse, insn_info.fmt.format_)
            var = "C" if "C" in args else "B"
            offset = sign(args[var], insn_info.fmt.format_.count(var))
            ii.add_branch(BranchType.TrueBranch, target=addr + offset * 2)
            ii.add_branch(
                BranchType.FalseBranch, target=addr + insn_info.fmt.insn_len * 2
            )
        elif insn_info.mnemonic.startswith("invoke-"):
            if insn_info.mnemonic.startswith("invoke-custom"):
                log_warn("Resolution of invoke-custom is not implemented")
                ii.add_branch(BranchType.UnresolvedBranch)
            else:
                data_to_parse = endian_swap_shorts(data[: 2 * insn_info.fmt.insn_len])
                args = parse_with_format(data_to_parse, insn_info.fmt.format_)
                meth = self.df.method_ids[args["B"]]
                if meth._insns_off is not None:
                    ii.add_branch(BranchType.CallDestination, target=meth._insns_off)
        return ii

    def get_instruction_text(
        self, data: bytes, addr: FileOffset
    ) -> Tuple[List[InstructionTextToken], int]:
        if not self.inialized_df:
            self.load_dex()
        return disassemble(self.df, data, addr)

    def get_instruction_low_level_il(
        self, data: bytes, addr: FileOffset, il: LowLevelILFunction
    ) -> int:
        if not self.inialized_df:
            self.load_dex()
        insn_info = self.insns[data[0]]
        if data[0] == 0x2B or data[0] == 0x2C and False:
            data_to_parse = endian_swap_shorts(data[: 2 * insn_info.fmt.insn_len])
            args = parse_with_format(data_to_parse, insn_info.fmt.format_)
            offset = sign(args["B"], insn_info.fmt.format_.count("B"))
            branches = list()  # [addr + offset * 2, addr + insn_info.fmt.insn_len * 2]
            if data[0] == 0x2B:  # packed-switch
                payload = cast(
                    SmaliPackedSwitchPayload,
                    self.df.pseudoinstructions[cast(FileOffset, addr + offset * 2)],
                )
                for i in range(len(payload.targets)):
                    key = payload.first_key + i
                    target_addr = addr + payload.targets[i] * 2
                    label = il.get_label_for_address(self, target_addr)
                    if label is None:
                        il.add_label_for_address(self, target_addr)
                        label = il.get_label_for_address(self, target_addr)
                    branches.append(label)
            else:  # sparse-switch
                log_error("NOT IMPLEMENTED YET")
                # for key, target in zip(payload.keys, payload.targets):
                #     branches.append(addr + target * 2)
            # log_warn(f'{branches=}')
            # reg=il.add(4, il.reg(4, f'v{args["A"]}'), il.const(4, 1))
            # branches_list = il.add_label_list(branches)
            # expr=il.expr(LowLevelILOperation.LLIL_JUMP_TO, reg, branches) #, size=insn_info.fmt.insn_len * 2))
            # il.append(expr)
        return insn_info.fmt.insn_len * 2
Beispiel #10
0
class Smali(Architecture):  # type: ignore
    """Architecture class for disassembling Dalvik bytecode into Smali

    Initializing the class calls android.smali.load_insns(), which imports
    cached instruction information from "android/instruction_data.pickle".

    The three mandatory Architecture functions are implemented:
        - get_instruction_info
        - get_instruction_text
        - get_instruction_low_level_il

    There is also load_dex(), which is called at the beginning of all three
    functions. It grabs the reference to the DexFile in view.
    """

    name = "Smali"

    # FIXME there should be 65536 registers, but binja hangs when the number gets above a thousand or so
    regs = dict(
        {f"v{i}": RegisterInfo(f"v{i}", 4)
         for i in range(256)},
        pc=RegisterInfo("pc", 4),
        fp=RegisterInfo("fp", 4),
        sp=RegisterInfo("sp", 4),
    )
    stack_pointer = "sp"
    max_instr_length = 200
    instr_alignment = 2

    def __init__(self) -> None:
        self.insns = load_insns()
        super().__init__()

    def load_dex(self) -> None:
        """Set self.df to DexFile of focused file."""
        self.df: DexFile = Architecture["Smali"].dfs[
            Architecture["Smali"].frame]

    def get_instruction_info(self, data: bytes,
                             addr: FileOffset) -> InstructionInfo:
        self.load_dex()
        ii = InstructionInfo()

        # Handle pseudoinstructions
        if data[0] == 0 and data[1] != 0:
            if data[1] > 3:
                ii.length = 2
                return ii
            ii.length = min(self.max_instr_length,
                            self.df.pseudoinstructions[addr]._total_size)
            ii.add_branch(BranchType.FunctionReturn)
            return ii

        # Handle normal instructions
        insn_info = self.insns[data[0]]
        ii.length = insn_info.fmt.insn_len * 2

        if insn_info.mnemonic.startswith("return"):
            ii.add_branch(BranchType.FunctionReturn)
        elif insn_info.mnemonic == "throw":
            ii.add_branch(BranchType.ExceptionBranch)
            # TODO
        elif insn_info.mnemonic.startswith("goto"):
            data_to_parse = endian_swap_shorts(data[:2 *
                                                    insn_info.fmt.insn_len])
            args = parse_with_format(data_to_parse, insn_info.fmt.format_)
            offset = sign(args["A"], insn_info.fmt.format_.count("A"))
            ii.add_branch(BranchType.UnconditionalBranch,
                          target=addr + offset * 2)
        elif (insn_info.mnemonic == "packed-switch"
              or insn_info.mnemonic == "sparse-switch"):
            data_to_parse = endian_swap_shorts(data[:2 *
                                                    insn_info.fmt.insn_len])
            args = parse_with_format(data_to_parse, insn_info.fmt.format_)
            offset = sign(args["B"], insn_info.fmt.format_.count("B"))
            ii.add_branch(BranchType.UnresolvedBranch)
            # Adding more than 2 branches causes binja to segfault, so this has
            # to be handled in LLIL instead.
        elif insn_info.mnemonic == "fill-array-data":
            data_to_parse = endian_swap_shorts(data[:2 *
                                                    insn_info.fmt.insn_len])
            args = parse_with_format(data_to_parse, insn_info.fmt.format_)
            offset = sign(args["B"], insn_info.fmt.format_.count("B"))
            ii.add_branch(BranchType.TrueBranch, target=addr + offset * 2)
            ii.add_branch(BranchType.FalseBranch,
                          target=addr + insn_info.fmt.insn_len * 2)
        elif insn_info.mnemonic.startswith("if-"):
            data_to_parse = endian_swap_shorts(data[:2 *
                                                    insn_info.fmt.insn_len])
            args = parse_with_format(data_to_parse, insn_info.fmt.format_)
            var = "C" if "C" in args else "B"
            offset = sign(args[var], insn_info.fmt.format_.count(var))
            ii.add_branch(BranchType.TrueBranch, target=addr + offset * 2)
            ii.add_branch(BranchType.FalseBranch,
                          target=addr + insn_info.fmt.insn_len * 2)
        elif insn_info.mnemonic.startswith("invoke-"):
            if insn_info.mnemonic.startswith("invoke-custom"):
                log_warn("Resolution of invoke-custom is not implemented")
                ii.add_branch(BranchType.UnresolvedBranch)
            else:
                data_to_parse = endian_swap_shorts(
                    data[:2 * insn_info.fmt.insn_len])
                args = parse_with_format(data_to_parse, insn_info.fmt.format_)
                meth = self.df.method_ids[args["B"]]
                if meth._insns_off is not None:
                    ii.add_branch(BranchType.CallDestination,
                                  target=meth._insns_off)
        return ii

    def get_instruction_text(
            self, data: bytes,
            addr: FileOffset) -> Tuple[List[InstructionTextToken], int]:
        self.load_dex()
        return disassemble(self.df, data, addr)

    def get_instruction_low_level_il(self, data: bytes, addr: FileOffset,
                                     il: LowLevelILFunction) -> int:
        self.load_dex()
        insn_info = self.insns[data[0]]
        if data[0] == 0x2B or data[0] == 0x2C and False:
            data_to_parse = endian_swap_shorts(data[:2 *
                                                    insn_info.fmt.insn_len])
            args = parse_with_format(data_to_parse, insn_info.fmt.format_)
            offset = sign(args["B"], insn_info.fmt.format_.count("B"))
            branches = list(
            )  # [addr + offset * 2, addr + insn_info.fmt.insn_len * 2]
            if data[0] == 0x2B:  # packed-switch
                payload = cast(
                    SmaliPackedSwitchPayload,
                    self.df.pseudoinstructions[cast(FileOffset,
                                                    addr + offset * 2)],
                )
                for i in range(len(payload.targets)):
                    key = payload.first_key + i
                    target_addr = addr + payload.targets[i] * 2
                    label = il.get_label_for_address(self, target_addr)
                    if label is None:
                        il.add_label_for_address(self, target_addr)
                        label = il.get_label_for_address(self, target_addr)
                    branches.append(label)
            else:  # sparse-switch
                log_error("NOT IMPLEMENTED YET")
                # for key, target in zip(payload.keys, payload.targets):
                #     branches.append(addr + target * 2)
            # log_warn(f'{branches=}')
            # reg=il.add(4, il.reg(4, f'v{args["A"]}'), il.const(4, 1))
            # branches_list = il.add_label_list(branches)
            # expr=il.expr(LowLevelILOperation.LLIL_JUMP_TO, reg, branches) #, size=insn_info.fmt.insn_len * 2))
            # il.append(expr)
        return insn_info.fmt.insn_len * 2
Beispiel #11
0
class GB(Architecture):
    name = "GB"
    address_size = 2
    default_int_size = 1
    max_instr_length = 3
    regs = {
        'a': RegisterInfo('a', 1),
        'b': RegisterInfo('b', 1),
        'c': RegisterInfo('c', 1),
        'd': RegisterInfo('d', 1),
        'e': RegisterInfo('e', 1),
        'f': RegisterInfo('f', 1),
        'h': RegisterInfo('h', 1),
        'l': RegisterInfo('l', 1),
        'af': RegisterInfo('af', 2),
        'bc': RegisterInfo('bc', 2),
        'cb': RegisterInfo('cb', 2),
        'de': RegisterInfo('de', 2),
        'hl': RegisterInfo('hl', 2),
        'sp': RegisterInfo('sp', 2),
        'pc': RegisterInfo('pc', 2),
    }
    stack_pointer = 'sp'
    flags = ["z", "n", "h", "c"]
    flag_write_types = ["*", "czn", "zn"]
    flag_roles = {
        'z': FlagRole.ZeroFlagRole,
        'n': FlagRole.NegativeSignFlagRole,
        'h': FlagRole.HalfCarryFlagRole,
        'c': FlagRole.CarryFlagRole,
    }
    flags_written_by_flag_write_type = {
        "*": ["c", "z", "h", "n"],
        "czn": ["c", "z", "n"],
        "zn": ["z", "n"],
    }

    def decode_operand(self, operand):
        if operand in self.regs.keys():
            return operand
        return None

    def decode_instruction(self, data, addr):
        if len(data) < 1:
            return None, None, None, None, None
        opcode = data[0]
        try:
            info = opcodes[hex(opcode)]
        except KeyError:
            return None, None, None, None, None
        instr = info['mnemonic']
        length = info['length']
        operands = []
        if 'operand1' in info:
            operands.append(info['operand1'].lower())
        if 'operand2' in info:
            operands.append(info['operand2'].lower())
        flags = [f.lower() for f in info['flags']]
        if length == 2:
            value = data[1]
        elif length == 3:
            value = struct.unpack('<H', data[1:3])[0]
        else:
            value = None
        return instr, length, operands, flags, value

    def perform_get_instruction_info(self, data, addr):
        instr, length, operands, flags, value = self.decode_instruction(
            data, addr)
        if instr is None:
            return None
        result = InstructionInfo()
        result.length = length
        opcode = data[0]
        if instr == 'JR':
            arg = data[1]
            dest = arg if arg < 128 else (256 - arg) * (-1)
            if opcode == 0x28 or opcode == 0x38:
                result.add_branch(BranchType.TrueBranch, addr + 2 + dest)
                result.add_branch(BranchType.FalseBranch, addr + 2)
            elif opcode == 0x20 or opcode == 0x30:
                result.add_branch(BranchType.TrueBranch, addr + 2)
                result.add_branch(BranchType.FalseBranch, addr + 2 + dest)
            else:
                result.add_branch(BranchType.UnconditionalBranch,
                                  addr + 2 + dest)
        elif instr == 'JP':
            if opcode == 0xe9:
                result.add_branch(BranchType.UnconditionalBranch, 0xdead)
            else:
                arg = struct.unpack('<H', data[1:3])[0]
                if opcode == 0xca or opcode == 0xda:
                    result.add_branch(BranchType.TrueBranch, arg)
                    result.add_branch(BranchType.FalseBranch, addr + 3)
                elif opcode == 0xc2 or opcode == 0xd2:
                    result.add_branch(BranchType.TrueBranch, addr + 3)
                    result.add_branch(BranchType.FalseBranch, arg)
                else:
                    result.add_branch(BranchType.UnconditionalBranch, arg)
        elif instr == 'RET':
            result.add_branch(BranchType.FunctionReturn)
        elif instr == 'CALL':
            result.add_branch(BranchType.CallDestination,
                              struct.unpack("<H", data[1:3])[0])
        return result

    def get_token(self, mnemonic, operand, data):
        if re.search(r'(d|r|a)8', operand) is not None:
            value = data[1]
            if re.match(r'(d|r|a)8', operand) is not None:
                token = InstructionTextToken(
                    InstructionTextTokenType.IntegerToken, "0x%.2x" % value,
                    value)
            elif re.match(r'\(a8\)', operand) is not None:
                token = InstructionTextToken(
                    InstructionTextTokenType.PossibleAddressToken,
                    "0xff%.2x" % value, value | 0xff00)
            else:
                token = InstructionTextToken(
                    InstructionTextTokenType.PossibleAddressToken,
                    "0x%.4x" % value, value)
        elif re.search(r'(d|r|a)16', operand) is not None:
            value = struct.unpack('<H', data[1:3])[0]
            if re.match(r'(d|r|a)16', operand) is not None:
                if mnemonic == "CALL":
                    token = InstructionTextToken(
                        InstructionTextTokenType.DataSymbolToken,
                        "sub_%x" % value, value)
                elif re.match(r'\(a16\)', operand) is not None:
                    token = InstructionTextToken(
                        InstructionTextTokenType.PossibleAddressToken,
                        "0x%.4x" % value, value)
                else:
                    token = InstructionTextToken(
                        InstructionTextTokenType.IntegerToken,
                        "0x%.4x" % value, value)
            else:
                token = InstructionTextToken(
                    InstructionTextTokenType.PossibleAddressToken,
                    "0x%.4x" % value, value)
        elif re.search(r'A|B|C|D|E|F|H|L|(SP)|(PC)', operand) is not None:
            if re.match(r'A|B|C|D|E|F|H|L|(SP)|(PC)', operand) is not None:
                token = InstructionTextToken(
                    InstructionTextTokenType.RegisterToken, operand.lower())
            else:
                token = InstructionTextToken(
                    InstructionTextTokenType.RegisterToken, operand.lower())
        else:
            token = InstructionTextToken(
                InstructionTextTokenType.RegisterToken, operand.lower())
        return token

    def perform_get_instruction_text(self, data, addr):
        instr, length, operands, flags, value = self.decode_instruction(
            data, addr)
        tokens = []
        opcode = data[0]
        if instr is None:
            return None
        tokens.append(
            InstructionTextToken(InstructionTextTokenType.InstructionToken,
                                 instr.lower()))
        if len(operands) >= 1:
            tokens.append(
                InstructionTextToken(
                    InstructionTextTokenType.OperandSeparatorToken,
                    ''.rjust(8 - len(instr))))
            tokens.append(self.get_token(instr, operands[0], data))
            if len(operands) == 2:
                tokens.append(
                    InstructionTextToken(
                        InstructionTextTokenType.OperandSeparatorToken, ', '))
                tokens.append(self.get_token(instr, operands[1], data))
        return tokens, length

    def perform_get_instruction_low_level_il(self, data, addr, il):
        return None
Beispiel #12
0
class SN8F2288(Architecture):
    name = "SN8F2288"
    endianness = Endianness.LittleEndian
    address_size = 2
    default_int_size = 1
    instr_alignment = 1
    max_instr_length = 2
    regs = {
        "STKP": RegisterInfo("STKP", 1),
        "A": RegisterInfo("A", 1),
        "R": RegisterInfo("R", 1),
        "Z": RegisterInfo("Z", 1),
        "Y": RegisterInfo("Y", 1),
        "PFLAG": RegisterInfo("PFLAG", 1),
        "RBANK": RegisterInfo("RBANK", 1)
    }
    stack_pointer = "STKP"
    flags = []
    flag_write_types = []
    flag_roles = {}
    flags_required_for_flag_condition = {}
    flags_written_by_flag_write_type = {}

    def perform_get_instruction_info(self, data, addr):
        if len(data) < 2:
            print "perform_get_instruction_info(%s, 0x%04x), not enough data!" % (
                tohex(data), addr)
            return None
        if addr % 2 != 0:
            print "perform_get_instruction_info(%s, 0x%04x), address not aligned!" % (
                tohex(data), addr)
            return None
        #print "perform_get_instruction_info(%s, 0x%04x)" % (tohex(data), addr)
        info = InstructionInfo()
        info.length = self.max_instr_length

        # workaround for a Binary Ninja bug, data is not guaranteed to be max_instr_length bytes
        data = data[:self.max_instr_length]

        instruction = unpack('<H', data)[0]
        bincode = instruction >> 8
        if bincode >= 0x80:
            opcode_key = bincode & 0xc0
            is_bit = False
        elif bincode >= 0x40:
            opcode_key = bincode & 0xf8
            is_bit = True
        else:
            opcode_key = bincode
            is_bit = False
        try:
            mask, opspace, jump_action, opcode, caption = opcode_dict[
                opcode_key]
        except KeyError:
            return None  # TODO is it possible to get more information?

        operand = instruction & mask
        branches = {
            NONXT:
            lambda: [(BranchType.FunctionReturn, 0)],
            NEXTI:
            lambda: [],
            BRNCH:
            lambda: [(BranchType.TrueBranch, addr + 4),
                     (BranchType.FalseBranch, addr + 2)],
            JUMPI:
            lambda: [(BranchType.UnconditionalBranch, operand * 2)
                     ],  # ROM addresses are 16 bits of data per address
            CALLI:
            lambda: [(BranchType.CallDestination, operand * 2)
                     ]  # ROM addresses are 16 bits of data per address
        }[jump_action]()

        for type, address in branches:
            info.add_branch(type, address)
        return info

    def perform_get_instruction_text(self, data, addr):
        if len(data) < 2:
            print "perform_get_instruction_text(%s, 0x%04x), not enough data!" % (
                tohex(data), addr)
            return None
        if addr % 2 != 0:
            print "perform_get_instruction_text(%s, 0x%04x), address not aligned!" % (
                tohex(data), addr)
            return None, None
        #print "perform_get_instruction_text(%s, 0x%04x)" % (tohex(data), addr)

        # workaround for a Binary Ninja bug, data is not guaranteed to be max_instr_length bytes
        data = data[:self.max_instr_length]

        instruction = unpack('<H', data)[0]
        tokens = disassemble(addr / 2, instruction)
        #tokens = []
        #tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".ORG"))
        #tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "))
        #tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "0x%04X" % (addr / 2)))
        #tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "))
        #tokens.append(InstructionTextToken(InstructionTextTokenType.InstructionToken, "DW"))
        #tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "))
        #tokens.append(InstructionTextToken(InstructionTextTokenType.HexDumpTextToken, "0x%04X" % instruction))
        return tokens, self.max_instr_length

    def perform_get_instruction_low_level_il(self, data, addr, il):
        return None
Beispiel #13
0
class Z80(Architecture):
    name = 'Z80'

    address_size = 2
    default_int_size = 1
    instr_alignment = 1
    max_instr_length = 4

    # register related stuff
    regs = {
        # main registers
        'AF': RegisterInfo('AF', 2),
        'BC': RegisterInfo('BC', 2),
        'DE': RegisterInfo('DE', 2),
        'HL': RegisterInfo('HL', 2),

        # alternate registers
        'AF_': RegisterInfo('AF_', 2),
        'BC_': RegisterInfo('BC_', 2),
        'DE_': RegisterInfo('DE_', 2),
        'HL_': RegisterInfo('HL_', 2),

        # main registers (sub)
        'A': RegisterInfo('AF', 1, 1),
        'B': RegisterInfo('BC', 1, 1),
        'C': RegisterInfo('BC', 1, 0),
        'D': RegisterInfo('DE', 1, 1),
        'E': RegisterInfo('DE', 1, 0),
        'H': RegisterInfo('HL', 1, 1),
        'L': RegisterInfo('HL', 1, 0),
        'Flags': RegisterInfo('AF', 0),

        # alternate registers (sub)
        'A_': RegisterInfo('AF_', 1, 1),
        'B_': RegisterInfo('BC_', 1, 1),
        'C_': RegisterInfo('BC_', 1, 0),
        'D_': RegisterInfo('DE_', 1, 1),
        'E_': RegisterInfo('DE_', 1, 0),
        'H_': RegisterInfo('HL_', 1, 1),
        'L_': RegisterInfo('HL_', 1, 0),
        'Flags_': RegisterInfo('AF_', 0),

        # index registers
        'IX': RegisterInfo('IX', 2),
        'IY': RegisterInfo('IY', 2),
        'SP': RegisterInfo('SP', 2),

        # other registers
        'I': RegisterInfo('I', 1),
        'R': RegisterInfo('R', 1),

        # program counter
        'PC': RegisterInfo('PC', 2),

        # status
        'status': RegisterInfo('status', 1)
    }

    stack_pointer = "SP"
Beispiel #14
0
class LR35902(Architecture):
    name = 'LR35902'
    address_size = 2        # 16-bit addresses
    default_int_size = 1    # 1-byte integers
    instr_alignment = 1     # no instruction alignment
    max_instr_length = 3    # maximum length (opcodes 1-2, operans 0-2 bytes)

    regs = {
        # Main registers
        'AF': RegisterInfo('AF', 2),
        'BC': RegisterInfo('BC', 2),
        'DE': RegisterInfo('DE', 2),
        'HL': RegisterInfo('HL', 2),

        'SP': RegisterInfo('SP', 2),
        'PC': RegisterInfo('PC', 2),

        # Sub registers
        'A': RegisterInfo('AF', 1, 1),
        'Flags': RegisterInfo('AF', 0),
        'B': RegisterInfo('BC', 1, 1),
        'C': RegisterInfo('BC', 1, 0),
        'D': RegisterInfo('DE', 1, 1),
        'E': RegisterInfo('DE', 1, 0),
        'H': RegisterInfo('HL', 1, 1),
        'L': RegisterInfo('HL', 1, 0),
    }

    flags = ["z", "n", "h", "c"]
    flag_write_types = ["*", "czn", "zn"]
    flag_roles = {
        'z': FlagRole.ZeroFlagRole,
        'n': FlagRole.NegativeSignFlagRole,
        'h': FlagRole.HalfCarryFlagRole,
        'c': FlagRole.CarryFlagRole,
    }
    flags_written_by_flag_write_type = {
        "*": ["c", "z", "h", "n"],
        "czn": ["c", "z", "n"],
        "zn": ["z", "n"],
    }

    stack_pointer = "SP"

    INVALID_INS = (None, None, None, None, None)
    conditions_strings = ['C', 'NC', 'Z', 'NZ']
    bit_instructions = ['BIT', 'RES', 'SET']

    # (address, name)
    IO_REGISTERS = {
        0xFF00: "P1",
        0xFF01: "SB",
        0xFF02: "SC",
        0xFF04: "DIV",
        0xFF05: "TIMA",
        0xFF06: "TMA",
        0xFF07: "TAC",
        0xFF0F: "IF",
        0xFF10: "NR10",
        0xFF11: "NR11",
        0xFF12: "NR12",
        0xff13: "NR13",
        0xFF14: "NR14",
        0xFF16: "NR21",
        0xFF17: "NR22",
        0xFF18: "NR23",
        0xFF19: "NR24",
        0xFF1A: "NR30",
        0xFF1B: "NR31",
        0xFF1C: "NR32",
        0xFF1D: "NR33",
        0xFF1E: "NR34",
        0xFF20: "NR41",
        0xFF21: "NR42",
        0xFF22: "NR43",
        0xFF23: "NR44",
        0xFF24: "NR50",
        0xFF25: "NR51",
        0xFF26: "NR52",

        0xFF30: "WAV0",
        0xFF31: "WAV1",
        0xFF32: "WAV2",
        0xFF33: "WAV3",
        0xFF34: "WAV4",
        0xFF35: "WAV5",
        0xFF36: "WAV6",
        0xFF37: "WAV7",
        0xFF38: "WAV8",
        0xFF39: "WAV9",
        0xFF3A: "WAVA",
        0xFF3B: "WAVB",
        0xFF3C: "WAVC",
        0xFF3D: "WAVD",
        0xFF3E: "WAVE",
        0xFF3F: "WAVF",

        0xFF40: "LCDC",
        0xFF41: "STAT",
        0xFF42: "SCY",
        0xFF43: "SCX",
        0xFF44: "LY",
        0xFF45: "LYC",
        0xFF46: "DMA",
        0xFF47: "BGP",
        0xFF48: "OBP0",
        0xFF49: "OBP1",
        0xFF4A: "WY",
        0xFF4B: "WX",
        0xFF4D: "KEY1",
        0xFF4F: "VBK",
        0xFF51: "HDMA1",
        0xFF52: "HDMA2",
        0xFF53: "HDMA3",
        0xFF54: "HDMA4",
        0xFF55: "HDMA5",
        0xFF56: "RP",
        0xFF68: "BCPS",
        0xFF69: "BCPD",
        0xFF6A: "OCPS",
        0xFF6B: "OCPD",
        0xFF70: "SVBK",
        0xFFFF: "IE",
    }

    def __init__(self):
        Architecture.__init__(self)

        basepath = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(basepath, 'opcodes.json')) as fin:
            self.opcodes = json.load(fin)


    def _get_io_register(self, addr):
        if addr in range(0xFF80, 0xFFFF):
            return f'HRAM_{addr-0xFF80:02X}'
        else:
            return self.IO_REGISTERS[addr]

    def _decode_instruction(self, data: bytes, addr: int):
        if len(data) == 0:
            return self.INVALID_INS
        if data[0] == 0xCB:
            if len(data) < 2:
                return self.INVALID_INS
            ins_entry = self.opcodes['cbprefixed'].get('%#x' % data[1], None)
        else:
            ins_entry = self.opcodes['unprefixed'].get('%#x' % data[0], None)

        if not ins_entry:
            return self.INVALID_INS

        ins_operands = []
        if 'operand1' in ins_entry:
            ins_operands.append(ins_entry['operand1'])
        if 'operand2' in ins_entry:
            ins_operands.append(ins_entry['operand2'])
        ins_flags = [f.lower() for f in ins_entry['flags']]
        if ins_entry['length'] == 2:
            ins_value = data[1]
        elif ins_entry['length'] == 3:
            ins_value = struct.unpack('<H', data[1:3])[0]
        else:
            ins_value = None

        return ins_entry['mnemonic'], ins_entry['length'], ins_operands, ins_flags, ins_value

    def _get_token(self, mnemonic: str, operand: str, data: bytes, addr: int, instruction_length: int):
        if mnemonic == 'STOP':
            return [InstructionTextToken(InstructionTextTokenType.TextToken, '0')]
        if mnemonic == 'RST':
            value = bytes.fromhex(operand[:2])[0]
            return [InstructionTextToken(InstructionTextTokenType.AddressDisplayToken, f"irs_usr{value//8}", value)]

        result = []
        depth = 0
        atoms = [t for t in re.split(r'([()\+\-])', operand) if t]

        for atom in atoms:
            if atom == 'd8':
                value = data[1]
                result.append(InstructionTextToken(
                    InstructionTextTokenType.PossibleValueToken, f'{value:#04x}', value))
            elif atom == 'd16':
                value = struct.unpack('<H', data[1:3])[0]
                result.append(InstructionTextToken(
                    InstructionTextTokenType.PossibleAddressToken, f'{value:#06x}', value))
            elif atom == 'a8':
                value = struct.unpack('<B', data[1:2])[0]
                try:
                    result.append(InstructionTextToken(
                        InstructionTextTokenType.DataSymbolToken, self._get_io_register(0xFF00+value), 0xFF00+value))
                except:
                    raise ValueError(
                        f'Invalid IO register offset {value} in {mnemonic} {atoms} at addr {addr:#0x}')
            elif atom == 'a16':
                value = struct.unpack('<H', data[1:3])[0]
                result.append(InstructionTextToken(
                    InstructionTextTokenType.PossibleAddressToken, f'{value:#06x}', value))
            elif atom == 'r8':
                value = struct.unpack('<b', data[1:2])[0]
                if atoms[0] == 'SP':  # SP+r8
                    result.append(InstructionTextToken(
                        InstructionTextTokenType.PossibleAddressToken, f'{value:#04x}', value))
                else:  # r8
                    result.append(InstructionTextToken(InstructionTextTokenType.PossibleAddressToken,
                                                       f'{addr+value+instruction_length:#06x}', addr+instruction_length+value))
            elif atom == '(':
                depth += 1
                result.append(InstructionTextToken(
                    InstructionTextTokenType.BeginMemoryOperandToken, atom))
            elif atom == ')':
                depth -= 1
                if depth < 0:
                    raise ValueError(f'Unbalanced parenthesis in {atoms}')
                result.append(InstructionTextToken(
                    InstructionTextTokenType.EndMemoryOperandToken, atom))
            elif atom in '+-':
                result.append(InstructionTextToken(
                    InstructionTextTokenType.TextToken, atom))
            elif atom in self.conditions_strings and mnemonic in ['CALL', 'RET', 'JR', 'JP']:
                result.append(InstructionTextToken(
                    InstructionTextTokenType.TextToken, atom))
            elif atom in self.regs.keys():
                result.append(InstructionTextToken(
                    InstructionTextTokenType.RegisterToken, atom))
            elif mnemonic in self.bit_instructions and atom in [str(x) for x in range(8)]:
                result.append(InstructionTextToken(
                    InstructionTextTokenType.TextToken, atom))
            else:
                raise ValueError(
                    f'Unrecognized atom {atom} in {atoms} for instruction {mnemonic}')

        return result

    def get_instruction_info(self, data: bytes, addr: int):
        ins_mnem, ins_len, _, _, _ = self._decode_instruction(data, addr)
        if not ins_mnem:
            return None

        result = InstructionInfo()
        result.length = ins_len
        ins_end = addr + ins_len

        opcode = data[0]
        if ins_mnem == 'JR':
            offset = struct.unpack('<b', data[1:2])[0]
            if opcode == 0x28 or opcode == 0x38:
                result.add_branch(BranchType.TrueBranch, ins_end + offset)
                result.add_branch(BranchType.FalseBranch, ins_end)
            elif opcode == 0x20 or opcode == 0x30:
                result.add_branch(BranchType.TrueBranch, ins_end)
                result.add_branch(BranchType.FalseBranch, ins_end + offset)
            else:
                result.add_branch(
                    BranchType.UnconditionalBranch, ins_end + offset)
        elif ins_mnem == 'JP':
            if opcode == 0xe9:
                result.add_branch(BranchType.IndirectBranch)
            else:
                arg = struct.unpack('<H', data[1:3])[0]
                if opcode == 0xca or opcode == 0xda:
                    result.add_branch(BranchType.TrueBranch, arg)
                    result.add_branch(BranchType.FalseBranch, ins_end)
                elif opcode == 0xc2 or opcode == 0xd2:
                    result.add_branch(BranchType.TrueBranch, ins_end)
                    result.add_branch(BranchType.FalseBranch, arg)
                else:
                    result.add_branch(BranchType.UnconditionalBranch, arg)
        elif ins_mnem == 'RET':
            result.add_branch(BranchType.FunctionReturn)
        elif ins_mnem == 'RETI':
            result.add_branch(BranchType.FunctionReturn)
        elif ins_mnem == 'CALL':
            result.add_branch(BranchType.CallDestination,
                              struct.unpack("<H", data[1:3])[0])
        return result

    def get_instruction_text(self, data, addr):
        ins_mnem, ins_len, operands, _, _ = self._decode_instruction(
            data, addr)
        if ins_mnem is None:
            return None

        tokens = []
        tokens.append(InstructionTextToken(
            InstructionTextTokenType.InstructionToken, ins_mnem.lower()))
        if len(operands) >= 1:
            tokens.append(InstructionTextToken(
                InstructionTextTokenType.IndentationToken, ''.rjust(8 - len(ins_mnem))))
            tokens += self._get_token(ins_mnem,
                                      operands[0], data, addr, ins_len)
            if len(operands) == 2:
                tokens.append(InstructionTextToken(
                    InstructionTextTokenType.OperandSeparatorToken, ', '))
                tokens += self._get_token(ins_mnem,
                                          operands[1], data, addr, ins_len)
        return tokens, ins_len

    def get_instruction_low_level_il(self, data, addr, il: LowLevelILFunction):
        return None
Beispiel #15
0
class Z80(Architecture):
    name = 'Z80'

    address_size = 2
    default_int_size = 1
    instr_alignment = 1
    max_instr_length = 4

    # register related stuff
    regs = {
        # main registers
        'AF': RegisterInfo('AF', 2),
        'BC': RegisterInfo('BC', 2),
        'DE': RegisterInfo('DE', 2),
        'HL': RegisterInfo('HL', 2),

        # alternate registers
        'AF_': RegisterInfo('AF_', 2),
        'BC_': RegisterInfo('BC_', 2),
        'DE_': RegisterInfo('DE_', 2),
        'HL_': RegisterInfo('HL_', 2),

        # main registers (sub)
        'A': RegisterInfo('AF', 1, 1),
        'B': RegisterInfo('BC', 1, 1),
        'C': RegisterInfo('BC', 1, 0),
        'D': RegisterInfo('DE', 1, 1),
        'E': RegisterInfo('DE', 1, 0),
        'H': RegisterInfo('HL', 1, 1),
        'L': RegisterInfo('HL', 1, 0),
        'Flags': RegisterInfo('AF', 0),

        # alternate registers (sub)
        'A_': RegisterInfo('AF_', 1, 1),
        'B_': RegisterInfo('BC_', 1, 1),
        'C_': RegisterInfo('BC_', 1, 0),
        'D_': RegisterInfo('DE_', 1, 1),
        'E_': RegisterInfo('DE_', 1, 0),
        'H_': RegisterInfo('HL_', 1, 1),
        'L_': RegisterInfo('HL_', 1, 0),
        'Flags_': RegisterInfo('AF_', 0),

        # index registers
        'IX': RegisterInfo('IX', 2),
        'IY': RegisterInfo('IY', 2),
        'SP': RegisterInfo('SP', 2),

        # other registers
        'I': RegisterInfo('I', 1),
        'R': RegisterInfo('R', 1),

        # program counter
        'PC': RegisterInfo('PC', 2),

        # status
        'status': RegisterInfo('status', 1)
    }

    stack_pointer = "SP"

    # internal
    cond_strs = ['C', 'NC', 'Z', 'NZ', 'M', 'P', 'PE', 'PO']
    reg8_strs = list('ABDHCELIR') + [
        'A\'', 'B\'', 'C\'', 'D\'', 'E\'', 'H\'', 'L\'', 'Flags', 'Flags\'',
        'IXh', 'IXl', 'IYh', 'IYl'
    ]
    reg16_strs = [
        'AF', 'BC', 'DE', 'HL', 'AF', 'AF\'', 'BC\'', 'DE\'', 'HL\'', 'IX',
        'IY', 'SP', 'PC'
    ]
    reg_strs = reg8_strs + reg16_strs

    def get_instruction_info(self, data, addr):
        (instrTxt, instrLen) = skwrapper.disasm(data, addr)
        if instrLen == 0:
            return None
        result = InstructionInfo()
        result.length = instrLen
        return result

    def get_instruction_text(self, data, addr):
        (instrTxt, instrLen) = skwrapper.disasm(data, addr)
        if instrLen == 0:
            return None

        result = []
        atoms = [t for t in re.split(r'([, ()\+])', instrTxt)
                 if t]  # delimeters kept if in capture group
        result.append(
            InstructionTextToken(InstructionTextTokenType.InstructionToken,
                                 atoms[0]))
        if atoms[1:]:
            result.append(
                InstructionTextToken(InstructionTextTokenType.TextToken, ' '))

        #
        for atom in atoms[1:]:
            if not atom or atom == ' ':
                continue
            # PROBLEM: cond 'C' conflicts with register C
            # eg: "RET C" is it "RET <reg>" or "REG <cc>" ?
            # eg: "CALL C" is it "CALL <reg>" or "CALL C,$0000" ?
            elif atom == 'C' and atoms[0] in ['CALL', 'RET']:
                # flag, condition code
                result.append(
                    InstructionTextToken(InstructionTextTokenType.TextToken,
                                         atom))
            elif atom in self.reg16_strs or atom in self.reg8_strs:
                result.append(
                    InstructionTextToken(
                        InstructionTextTokenType.RegisterToken, atom))
            elif atom in self.cond_strs:
                result.append(
                    InstructionTextToken(InstructionTextTokenType.TextToken,
                                         atom))
            elif atom[0] == '#':
                result.append(
                    InstructionTextToken(InstructionTextTokenType.IntegerToken,
                                         atom, int(atom[1:], 16)))
            elif atom[0] == '$':
                if len(atom) == 5:
                    result.append(
                        InstructionTextToken(
                            InstructionTextTokenType.PossibleAddressToken,
                            atom, int(atom[1:], 16)))
                else:
                    result.append(
                        InstructionTextToken(
                            InstructionTextTokenType.IntegerToken, atom,
                            int(atom[1:], 16)))
            elif atom.isdigit():
                result.append(
                    InstructionTextToken(InstructionTextTokenType.IntegerToken,
                                         atom, int(atom)))
            elif atom == '(':
                result.append(
                    InstructionTextToken(
                        InstructionTextTokenType.BeginMemoryOperandToken,
                        atom))
            elif atom == ')':
                result.append(
                    InstructionTextToken(
                        InstructionTextTokenType.EndMemoryOperandToken, atom))
            elif atom == '+':
                result.append(
                    InstructionTextToken(InstructionTextTokenType.TextToken,
                                         atom))
            elif atom == ',':
                result.append(
                    InstructionTextToken(
                        InstructionTextTokenType.OperandSeparatorToken, atom))
            else:
                raise Exception('unfamiliar token: %s from instruction %s' %
                                (tok, instrTxt))

        return result, instrLen

    def get_instruction_low_level_il(self, data, addr, il):
        return None
Beispiel #16
0
class Chip8(Architecture):
    name = 'CHIP-8'
    endianness = Endianness.BigEndian
    address_size = 2
    default_int_size = 2
    instr_alignment = 2
    max_instr_length = 2
    opcode_display_length = 2
    regs = {
        'PC': RegisterInfo('PC', 2),
        'SP': RegisterInfo('SP', 1),
        'I': RegisterInfo('I', 2),
        'DT': RegisterInfo('DT', 1),
        'ST': RegisterInfo('ST', 1),
        'V0': RegisterInfo('V0', 1),
        'V1': RegisterInfo('V1', 1),
        'V2': RegisterInfo('V2', 1),
        'V3': RegisterInfo('V3', 1),
        'V4': RegisterInfo('V4', 1),
        'V5': RegisterInfo('V5', 1),
        'V6': RegisterInfo('V6', 1),
        'V7': RegisterInfo('V7', 1),
        'V8': RegisterInfo('V8', 1),
        'V9': RegisterInfo('V9', 1),
        'Va': RegisterInfo('Va', 1),
        'Vb': RegisterInfo('Vb', 1),
        'Vc': RegisterInfo('Vc', 1),
        'Vd': RegisterInfo('Vd', 1),
        'Ve': RegisterInfo('Ve', 1),
        'Vf': RegisterInfo('Vf', 1)
    }
    stack_pointer = 'SP'

    def __init__(self):
        super().__init__()
        self.dis = Disassembler()

    def get_instruction_info(self, data, addr):
        """ Establishes instruction length and branch info """
        if len(data) > 2:
            data = data[:2]
        result = InstructionInfo()
        result.length = 2
        vars = self.dis._vars(data)
        baddr = vars['addr']
        binfo = self.dis.get_branch_info(data)
        if binfo == BranchType.UnconditionalBranch or binfo == BranchType.CallDestination:
            result.add_branch(binfo, baddr)
        elif binfo == BranchType.FunctionReturn or binfo == BranchType.IndirectBranch:
            result.add_branch(binfo)
        elif binfo == BranchType.TrueBranch:
            result.add_branch(BranchType.TrueBranch, addr + 4)
            result.add_branch(BranchType.FalseBranch, addr + 2)
        elif binfo == BranchType.FalseBranch:
            result.add_branch(BranchType.TrueBranch, addr + 4)
            result.add_branch(BranchType.FalseBranch, addr + 2)
        return result

    def get_instruction_text(self, data, addr):
        """ Display text for tokanized instruction """
        if len(data) > 2:
            data = data[:2]
        tokens = self.dis.disasm(data, addr)
        if not tokens:
            tokens = [
                InstructionTextToken(InstructionTextTokenType.InstructionToken,
                                     '_emit'),
                InstructionTextToken(InstructionTextTokenType.TextToken, ' '),
                InstructionTextToken(InstructionTextTokenType.IntegerToken,
                                     hex(data[0]), data[0]),
                InstructionTextToken(
                    InstructionTextTokenType.OperandSeparatorToken, ', '),
                InstructionTextToken(InstructionTextTokenType.IntegerToken,
                                     hex(data[1]), data[1])
            ]
        return tokens, 2

    def get_instruction_low_level_il(self, data, addr, il):
        """ TODO: Implement a lifter here """
        return None
Beispiel #17
0
class M6502(Architecture):
    name = "6502"
    address_size = 2
    default_int_size = 1
    max_instr_length = 3
    regs = {
        "a": RegisterInfo("a", 1),
        "x": RegisterInfo("x", 1),
        "y": RegisterInfo("y", 1),
        "s": RegisterInfo("s", 1)
    }
    stack_pointer = "s"
    flags = ["c", "z", "i", "d", "b", "v", "s"]
    flag_write_types = ["*", "czs", "zvs", "zs"]
    flag_roles = {
        "c": FlagRole.
        SpecialFlagRole,  # Not a normal carry flag, subtract result is inverted
        "z": FlagRole.ZeroFlagRole,
        "v": FlagRole.OverflowFlagRole,
        "s": FlagRole.NegativeSignFlagRole
    }
    flags_required_for_flag_condition = {
        LowLevelILFlagCondition.LLFC_UGE: ["c"],
        LowLevelILFlagCondition.LLFC_ULT: ["c"],
        LowLevelILFlagCondition.LLFC_E: ["z"],
        LowLevelILFlagCondition.LLFC_NE: ["z"],
        LowLevelILFlagCondition.LLFC_NEG: ["s"],
        LowLevelILFlagCondition.LLFC_POS: ["s"]
    }
    flags_written_by_flag_write_type = {
        "*": ["c", "z", "v", "s"],
        "czs": ["c", "z", "s"],
        "zvs": ["z", "v", "s"],
        "zs": ["z", "s"]
    }

    def decode_instruction(self, data, addr):
        if len(data) < 1:
            return None, None, None, None
        opcode = ord(data[0])
        instr = InstructionNames[opcode]
        if instr is None:
            return None, None, None, None

        operand = InstructionOperandTypes[opcode]
        length = 1 + OperandLengths[operand]
        if len(data) < length:
            return None, None, None, None

        if OperandLengths[operand] == 0:
            value = None
        elif operand == REL:
            value = (addr + 2 + struct.unpack("b", data[1])[0]) & 0xffff
        elif OperandLengths[operand] == 1:
            value = ord(data[1])
        else:
            value = struct.unpack("<H", data[1:3])[0]

        return instr, operand, length, value

    def perform_get_instruction_info(self, data, addr):
        instr, operand, length, value = self.decode_instruction(data, addr)
        if instr is None:
            return None

        result = InstructionInfo()
        result.length = length
        if instr == "jmp":
            if operand == ADDR:
                result.add_branch(BranchType.UnconditionalBranch,
                                  struct.unpack("<H", data[1:3])[0])
            else:
                result.add_branch(BranchType.UnresolvedBranch)
        elif instr == "jsr":
            result.add_branch(BranchType.CallDestination,
                              struct.unpack("<H", data[1:3])[0])
        elif instr in ["rti", "rts"]:
            result.add_branch(BranchType.FunctionReturn)
        if instr in ["bcc", "bcs", "beq", "bmi", "bne", "bpl", "bvc", "bvs"]:
            dest = (addr + 2 + struct.unpack("b", data[1])[0]) & 0xffff
            result.add_branch(BranchType.TrueBranch, dest)
            result.add_branch(BranchType.FalseBranch, addr + 2)
        return result

    def perform_get_instruction_text(self, data, addr):
        instr, operand, length, value = self.decode_instruction(data, addr)
        if instr is None:
            return None

        tokens = []
        tokens.append(
            InstructionTextToken(InstructionTextTokenType.TextToken,
                                 "%-7s " % instr.replace("@", "")))
        tokens += OperandTokens[operand](value)
        return tokens, length

    def perform_get_instruction_low_level_il(self, data, addr, il):
        instr, operand, length, value = self.decode_instruction(data, addr)
        if instr is None:
            return None

        operand = OperandIL[operand](il, value)
        instr = InstructionIL[instr](il, operand)
        if isinstance(instr, list):
            for i in instr:
                il.append(i)
        elif instr is not None:
            il.append(instr)

        return length

    def perform_is_never_branch_patch_available(self, data, addr):
        if (data[0] == "\x10") or (data[0] == "\x30") or (
                data[0]
                == "\x50") or (data[0] == "\x70") or (data[0] == "\x90") or (
                    data[0] == "\xb0") or (data[0] == "\xd0") or (data[0]
                                                                  == "\xf0"):
            return True
        return False

    def perform_is_invert_branch_patch_available(self, data, addr):
        if (data[0] == "\x10") or (data[0] == "\x30") or (
                data[0]
                == "\x50") or (data[0] == "\x70") or (data[0] == "\x90") or (
                    data[0] == "\xb0") or (data[0] == "\xd0") or (data[0]
                                                                  == "\xf0"):
            return True
        return False

    def perform_is_always_branch_patch_available(self, data, addr):
        return False

    def perform_is_skip_and_return_zero_patch_available(self, data, addr):
        return (data[0] == "\x20") and (len(data) == 3)

    def perform_is_skip_and_return_value_patch_available(self, data, addr):
        return (data[0] == "\x20") and (len(data) == 3)

    def perform_convert_to_nop(self, data, addr):
        return "\xea" * len(data)

    def perform_never_branch(self, data, addr):
        if (data[0] == "\x10") or (data[0] == "\x30") or (
                data[0]
                == "\x50") or (data[0] == "\x70") or (data[0] == "\x90") or (
                    data[0] == "\xb0") or (data[0] == "\xd0") or (data[0]
                                                                  == "\xf0"):
            return "\xea" * len(data)
        return None

    def perform_invert_branch(self, data, addr):
        if (data[0] == "\x10") or (data[0] == "\x30") or (
                data[0]
                == "\x50") or (data[0] == "\x70") or (data[0] == "\x90") or (
                    data[0] == "\xb0") or (data[0] == "\xd0") or (data[0]
                                                                  == "\xf0"):
            return chr(ord(data[0]) ^ 0x20) + data[1:]
        return None

    def perform_skip_and_return_value(self, data, addr, value):
        if (data[0] != "\x20") or (len(data) != 3):
            return None
        return "\xa9" + chr(value & 0xff) + "\xea"
Beispiel #18
0
class S1C88(Architecture):
    name = 's1c88:s1c88'
    address_size = 2
    max_instr_length = 8

    regs = {
        'BA': RegisterInfo('BA', 2),
		'A': RegisterInfo('BA', 1, 0),
        'B': RegisterInfo('BA', 1, 1),
        'HL': RegisterInfo('HL', 2),
		'L': RegisterInfo('HL', 1, 0),
        'H': RegisterInfo('HL', 1, 1),
        'IX': RegisterInfo('IX', 2),
        'IY': RegisterInfo('IY', 2),
        'PC': RegisterInfo('PC', 2),
        'SP': RegisterInfo('SP', 2),
        'BR': RegisterInfo('BR', 1),
        'NB': RegisterInfo('NB', 1),
        'CB': RegisterInfo('CB', 1),
        'EP': RegisterInfo('EP', 1),
        'IP': RegisterInfo('YP', 2),
        'XP': RegisterInfo('XP', 1, 0),
        'YP': RegisterInfo('YP', 1, 1),
        'SC': RegisterInfo('SC', 1),
    }
    stack_pointer = 'SP'

    flags = ['z', 'c', 'v', 'n', 'd', 'u', 'i0', 'i1',]

    flag_roles = {
        'z': FlagRole.ZeroFlagRole,
        'c': FlagRole.CarryFlagRole,
        'v': FlagRole.OverflowFlagRole,
        'n': FlagRole.NegativeSignFlagRole,
        'd': FlagRole.SpecialFlagRole,
        'u': FlagRole.SpecialFlagRole,
        'i0': FlagRole.SpecialFlagRole,
        'i1': FlagRole.SpecialFlagRole,
    }

    flags_required_for_flag_condition = {
        # Unsigned comparisons
        LowLevelILFlagCondition.LLFC_UGE: ['c'],
        LowLevelILFlagCondition.LLFC_ULT: ['c'],
        # Signed comparisions
        LowLevelILFlagCondition.LLFC_SGE: ['n', 'v'],
        LowLevelILFlagCondition.LLFC_SGT: ['z', 'n', 'v'],
        LowLevelILFlagCondition.LLFC_SLE: ['z', 'n', 'v'],
        LowLevelILFlagCondition.LLFC_SLT: ['n', 'v'],
        # Equals or not
        LowLevelILFlagCondition.LLFC_E: ['z'],
        LowLevelILFlagCondition.LLFC_NE: ['z'],
        # Overflow or not
        LowLevelILFlagCondition.LLFC_NO: ['v'],
        LowLevelILFlagCondition.LLFC_O: ['v'],
        # Negative or not
        LowLevelILFlagCondition.LLFC_NEG: ['n'],
        LowLevelILFlagCondition.LLFC_POS: ['n']
    }

    flag_write_types = [
        ''
        "*",
        "zcvn",
        "zn",
        "z",
        "zcn",
    ]

    flags_written_by_flag_write_type = {
		"*": ['z', 'c', 'v', 'n', 'd', 'u', 'i0', 'i1'],
        "zcvn": ["z", "c", "v", "n"],
        "zn": ["z", "n"],
        "z": ["z"],
        "zcn": ["z", "c", "n"],
	}

    def get_instruction_info(self, data, addr):

        r = decode(data, addr)

        if r is None:
            return None

        return r[1]

    def get_instruction_text(self, data, addr):

        r = decode(data, addr)

        if r is None:
            return None

        return r[0], r[1].length

    def get_instruction_low_level_il(self, data, addr, il):

        r = decode(data, addr)

        if r is None:
            return None

        fn = r[2]
        if fn is not None:
            for f in fn:
                f(il)

        return r[1].length
class BPFArch(Architecture):
    name = "BPF"
    address_size = 4
    default_int_size = 4
    max_instr_length = 8

    regs = {
        "a": RegisterInfo("a", 4),  # accumulator
        "x": RegisterInfo("x", 4),  # index
        # BPF only has 16 Memory address to store to
        # and binary ninja doesn't have a concept of different
        # address spaces, so hacked BPF memory into registers
        "m0": RegisterInfo("m0", 4),  # M[0]
        "m1": RegisterInfo("m1", 4),  # M[1]
        "m2": RegisterInfo("m2", 4),  # M[2]
        "m3": RegisterInfo("m3", 4),  # M[3]
        "m4": RegisterInfo("m4", 4),  # M[4]
        "m5": RegisterInfo("m5", 4),  # M[5]
        "m6": RegisterInfo("m6", 4),  # M[6]
        "m7": RegisterInfo("m7", 4),  # M[7]
        "m8": RegisterInfo("m8", 4),  # M[8]
        "m9": RegisterInfo("m9", 4),  # M[9]
        "m10": RegisterInfo("m10", 4),  # M[10]
        "m11": RegisterInfo("m11", 4),  # M[11]
        "m12": RegisterInfo("m12", 4),  # M[12]
        "m13": RegisterInfo("m13", 4),  # M[13]
        "m14": RegisterInfo("m14", 4),  # M[14]
        "m15": RegisterInfo("m15", 4),  # M[15]
        # binary ninja doesn't have a concept of differnt
        # address space, so all packet accesses go through a
        # virtual pkt register that notionally holds the address of packet start
        # at program entry
        "pkt": RegisterInfo("pkt", 4),
        # virtual address to notionally holds
        # size of packet at program entry
        "len": RegisterInfo("len", 4),
        # binary ninja needs a stack or is unhappy
        "dummystack": RegisterInfo("dummystack", 4),
        # virtual register to make clear what return value is in llil
        "dummyret": RegisterInfo("dummyret", 4),
        # virtual link register to return to. BPF return is more akin to a halt
        "dummylr": RegisterInfo("dummylr", 4),

    }
    # because I _must_ have a stack pointer. (BPF has no stack)
    stack_pointer = "dummystack"

    def perform_get_instruction_info(self, data, addr):
        valid, instr = get_instruction(data, addr)
        result = InstructionInfo()
        if valid:
            result.length = 8
            if instr.opcode in InstructionInfoModders:
                InstructionInfoModders[instr.opcode](result, instr)
            return result
        else:
            # This is _EXCEEDINGLY_ important to return on failure.
            # Things will break in creative ways if anything other than None
            # is returned for invalid data
            return None

    def perform_get_instruction_text(self, data, addr):
        valid, instr = get_instruction(data, addr)
        if not valid:
            # This is _EXCEEDINGLY_ important to return on failure.
            # Things will break in creative ways if anything other than None
            # is returned for invalid data
            return None
        if instr.opcode not in InstructionNames:
            log('debug: %s' % instr)
            return (
                [InstructionTextToken(InstructionTextTokenType.InstructionToken, "unk opcode 0x%x" % instr.opcode)], 8)
        tokens = []
        instr_name = InstructionNames[instr.opcode]
        tokens.append(InstructionTextToken(InstructionTextTokenType.InstructionToken, instr_name))
        formatter = InstructionFormatters[instr.opcode]
        extra_tokens = formatter(instr)
        if len(extra_tokens) > 0:
            tokens += [InstructionTextToken(InstructionTextTokenType.TextToken, " ")] + extra_tokens
        return tokens, 8

    def perform_get_instruction_low_level_il(self, data, addr, il):
        log('Asking to decode %d bytes at 0x%x' % (len(data), addr))
        valid, instr = get_instruction(data[0:8], addr)
        if not valid:
            log('*********** Tried an failed **********')
            # This is _EXCEEDINGLY_ important to return on failure.
            # Things will break in creative ways if anything other than None
            # is returned for invalid data
            return None
        if instr.opcode not in InstructionLLIL:
            log('Adding il.undefined()')
            # il.append(il.unimplemented())
            il.append(il.undefined())
        else:
            il_exp = InstructionLLIL[instr.opcode](il, instr)
            if il_exp is not None:
                il.append(il_exp)
                log('appended: %s' % LowLevelILInstruction(il, il_exp.index))
            else:
                log('Failed to generate il')

        log('Full IL Decode was successful len(il): %d' % len(il))
        return 8
Beispiel #20
0
class Z80(Architecture):
	name = 'Z80'

	address_size = 2
	default_int_size = 1
	instr_alignment = 1
	max_instr_length = 4

	# register related stuff
	regs = {
		# main registers
		'AF': RegisterInfo('AF', 2),
		'BC': RegisterInfo('BC', 2),
		'DE': RegisterInfo('DE', 2),
		'HL': RegisterInfo('HL', 2),

		# alternate registers
		'AF_': RegisterInfo('AF_', 2),
		'BC_': RegisterInfo('BC_', 2),
		'DE_': RegisterInfo('DE_', 2),
		'HL_': RegisterInfo('HL_', 2),

		# main registers (sub)
		'A': RegisterInfo('AF', 1, 1),
		'B': RegisterInfo('BC', 1, 1),
		'C': RegisterInfo('BC', 1, 0),
		'D': RegisterInfo('DE', 1, 1),
		'E': RegisterInfo('DE', 1, 0),
		'H': RegisterInfo('HL', 1, 1),
		'L': RegisterInfo('HL', 1, 0),
		'Flags': RegisterInfo('AF', 0),

		# alternate registers (sub)
		'A_': RegisterInfo('AF_', 1, 1),
		'B_': RegisterInfo('BC_', 1, 1),
		'C_': RegisterInfo('BC_', 1, 0),
		'D_': RegisterInfo('DE_', 1, 1),
		'E_': RegisterInfo('DE_', 1, 0),
		'H_': RegisterInfo('HL_', 1, 1),
		'L_': RegisterInfo('HL_', 1, 0),
		'Flags_': RegisterInfo('AF_', 0),

		# index registers
		'IX': RegisterInfo('IX', 2),
		'IY': RegisterInfo('IY', 2),
		'SP': RegisterInfo('SP', 2),

		# other registers
		'I': RegisterInfo('I', 1),
		'R': RegisterInfo('R', 1),

		# program counter
		'PC': RegisterInfo('PC', 2),

		# status
		'status': RegisterInfo('status', 1)
	}

	stack_pointer = "SP"

	def get_instruction_info(self, data, addr):
		(instrTxt, instrLen) = skwrapper.disasm(data, addr)
		if instrLen == 0:
			return None
		result = InstructionInfo()
		result.length = instrLen
		return result 

	def get_instruction_text(self, data, addr):
		(instrTxt, instrLen) = skwrapper.disasm(data, addr)
		tokens = [InstructionTextToken(InstructionTextTokenType.TextToken, instrTxt)]
		return tokens, instrLen

	def get_instruction_low_level_il(self, data, addr, il):
		return None