def translate(self, target): output = [] for seg in target.memory.segments: if seg.code: for addr in target.entry_points: FIXDELAY = 0 branchQ = [] while addr+4 < seg.end: try: IR = self.disassemble(target.memory.getrange(addr,addr+4), addr) except KeyError, e: print "finishing early due to invalid disassembly", e break if type(IR) == str: instrs = [ir.unhandled_instruction(IR)] instrs[0].address = addr else: instrs = IR[1] added = 0 #reordering the delay slot is kind of like the chicken and egg problem if FIXDELAY: FIXDELAY = 0 added = 1 #XXX even worse than normal code alert, flip the addresses also a = instrs[0].address b = branchQ[0].address for n in instrs: n.address = b for n in branchQ: n.address = a output += instrs output += branchQ #MIPS requires delay slot re-ordering for branches for n in instrs: if n.type in ["jump","call","ret","branch_true"]: #wait until next time around to add the branch FIXDELAY = 1 branchQ = instrs break if not FIXDELAY and not added: output += instrs addr += 4
def makeIR(self, instruction, size, operands, TMEM_IR, OPmode, addr): """ instruction -> dictionary of instruction information operands -> list of operand type and value TMEM_IR -> IR for complex memory access """ #print "-----make IR ------" m = instruction['mnemonic'] preload = False poststore = False #figure out if TMEM_IR is LOAD or STORE if TMEM_IR: #first operand is destination if 'reg/mem' in operands[0][0]: preload = True poststore = True elif 'reg/mem' in operands[1][0]: preload = True elif 'moffset' in operands[1][0]: preload = True IR = [] if m == "ADC": IR = [ir.operation(operands[0][1],'=',operands[0][1],'+',operands[1][1],'+',self.DR("CF"))] elif m == "ADD": IR = [ir.operation(operands[0][1],'=',operands[0][1],'+',operands[1][1])] elif m == "AND": IR = [ir.operation(operands[0][1],'=',operands[0][1],'&',operands[1][1])] elif m == "CALL": if poststore: poststore = False IR = [ir.operation(self.DR("TVAL"),'=',self.DR("EIP"),"+",ir.constant_operand(size)), ir.operation(self.DR("ESP",OPmode),'=',self.DR("ESP"),'-',ir.constant_operand(4)), ir.store(self.DR("TVAL"),self.DR("ESP",OPmode))] #absolute jump vs relative jump if 'rel' in operands[0][0]: IR += [ir.operation(self.DR("tval"),'=',self.DR("EIP"),"+",operands[0][1],'+',ir.constant_operand(size)), ir.call(self.DR("tval"), relative=True) ] else: IR += [ir.call(operands[0][1])] #controversial... analyzer must know callee _should_ do this #IR += [ir.operation(self.DR('esp'),'=',self.DR('esp'),'+',ir.constant_operand(4))] elif m == "CLC": IR = [ir.operation(self.DR("CF"), '=', ir.constant_operand(0))] elif m == "CLD": IR = [ir.operation(self.DR("DF"), '=', ir.constant_operand(0))] elif m == "CMP": if poststore: poststore = False IR = [ir.operation(operands[0][1],'-',operands[1][1])] elif m == "DEC": IR = [ir.operation(operands[0][1],'=',operands[0][1],'-',ir.constant_operand(1))] elif m == "INC": IR = [ir.operation(operands[0][1],'=',operands[0][1],'+',ir.constant_operand(1))] elif m == "IMUL": #XXXXX TODO FIX ME #EDX:EAX = a*b || IR = [ir.operation(operands[0][1], '=', operands[0][1], '*', operands[1][1])] if operands[0][1].register_name == 'eax': #TODO SIZE IR += [ir.operation(self.DR("EDX"), '=', '(', operands[0][1], '*', operands[1][1], ')', '>>', ir.constant_operand(32))] elif m == "JMP": if poststore: poststore = False #absolute jump vs relative jump if isinstance(operands[0][1], ir.constant_operand): DEST = ir.constant_operand(int(size + operands[0][1].value)) else: DEST = operands[0][1] if 'rel' in operands[0][0]: IR += [ir.jump(DEST,relative=True)] else: IR += [ir.jump(DEST)] elif 'J' == m[0]: #IR = [ir.operation(self.DR("tval"),'=',self.DR("EIP"),"+",operands[0][1])] DEST = ir.constant_operand(int(size + operands[0][1].value)) IR = [] if m == "JO": IR += [ir.operation(self.DR('OF'),'==',ir.constant_operand(1))] elif m == "JNO": IR += [ir.operation(self.DR('OF'),'==',ir.constant_operand(0))] elif m == "JB": IR += [ir.operation(self.DR('CF'),'==',ir.constant_operand(1))] elif m == "JNC": IR += [ir.operation(self.DR('CF'),'==',ir.constant_operand(0))] elif m == "JBE": IR += [ir.operation(self.DR('ZF'),'==',ir.constant_operand(1), '||', self.DR("CF"), '==', ir.constant_operand(1))] elif m == "JNBE": IR += [ir.operation(self.DR('ZF'),'==',ir.constant_operand(0), '&&', self.DR("CF"), '==', ir.constant_operand(0))] elif m == "JS": IR += [ir.operation(self.DR('SF'),'==',ir.constant_operand(1))] elif m == "JNS": IR += [ir.operation(self.DR('SF'),'==',ir.constant_operand(0))] elif m == "JP": IR += [ir.operation(self.DR('PF'),'==',ir.constant_operand(1))] elif m == "JNP": IR += [ir.operation(self.DR('PF'),'==',ir.constant_operand(0))] elif m == "JL": IR += [ir.operation(self.DR('SF'),'!=',self.DR('OF'))] elif m == "JNL": IR += [ir.operation(self.DR('SF'),'==',self.DR('OF'))] elif m == "JLE": IR += [ir.operation(self.DR('ZF'),'==',ir.constant_operand(1), '||', self.DR("SF"), '!=', self.DR("OF"))] elif m == "JNLE": IR += [ir.operation(self.DR('ZF'),'==',ir.constant_operand(0), '&&', self.DR("SF"), '==', self.DR("OF"))] elif m == "JNZ": IR += [ir.operation(self.DR('ZF'),'==',ir.constant_operand(1))] elif m == "JZ": IR += [ir.operation(self.DR('ZF'), '==', ir.constant_operand(0))] if 'rel' in operands[0][0]: IR += [ir.branch_true(DEST)] else: IR += [ir.branch_true(operands[0][1])] elif m == "LEA": preload = False poststore = False IR = [ir.operation(operands[0][1], '=', self.DR("TMEM"))] elif m == "LEAVE": # mov esp, ebp # pop ebp IR = [ir.operation(self.DR("ESP"),'=',self.DR("EBP")), ir.load(self.DR("ESP"), self.DR("EBP")), ir.operation(self.DR("ESP",OPmode), '=', self.DR("ESP",OPmode),"+",ir.constant_operand(4)) ] elif m == "MOV": #print hex(addr), operands if preload: if 'moffset' not in operands[1][0]: if operands[1][1].type != 'register' or operands[1][1].register_name != 'tval': preload = False if operands[0][1].type == 'register' and operands[0][1].register_name == 'tval': IR = [ir.operation(operands[1][1])] else: IR = [ir.operation(operands[0][1], '=', operands[1][1])] elif m == "MOVSX": #XXXXXX TODO sign extend IR = [ir.operation(operands[0][1], '=', operands[1][1])] elif m == "MOVZX": if '16' in operands[1][0]: mask = 0xff else: mask = 0xffff IR = [ir.operation(operands[0][1], '=', operands[1][1], '&', ir.constant_operand(mask))] elif m == "NOT": IR = [ir.operation(operands[0][1],'=','~',operands[0][1])] elif m == "OR": IR = [ir.operation(operands[0][1],'=',operands[0][1],'|',operands[1][1])] elif m == "POP": #TODO 0x20b6: pushl 0xff5b(%eax) no handled correctly IR = [ir.load(self.DR("ESP",OPmode), operands[0][1]), ir.operation(self.DR("ESP",OPmode), '=', self.DR("ESP",OPmode),"+",ir.constant_operand(4))] elif m == "PUSH": if type(operands[0][1]) == str: IR = [ir.unhandled_instruction(instruction['mnemonic'])] else: IR = [ir.operation(self.DR("ESP",OPmode), '=', self.DR("ESP",OPmode),"-",ir.constant_operand(4)), ir.store(operands[0][1], self.DR("ESP",OPmode)), ] if operands[0][1].type == 'register' and operands[0][1].register_name != 'tval': poststore = False elif m == "RET": #pop eip preload = True IR = [ir.load(self.DR("ESP",OPmode), self.DR('TVAL')), ir.operation(self.DR('ESP',OPmode),'=',self.DR("ESP",OPmode),'+',ir.constant_operand(4)), ir.ret(self.DR('TVAL'))] elif m == "ROL": #XXX TODO FIX sz here sz = operands[0][1].size IR = [ir.operation(operands[0][1], '=', operands[0][1], '<<', operands[1][1], '|', operands[0][1],'>>', '(', ir.constant_operand(sz),'-',operands[1][1],')')] elif m == "ROR": sz = operands[0][1].size IR = [ir.operation(operands[0][1], '=', operands[0][1], '>>', operands[1][1], '|', operands[0][1],'<<', '(', ir.constant_operand(sz),'-',operands[1][1],')')] elif m == "SAL": IR = [ir.operation(operands[0][1],'=', '(', self.DR("CF"), '<<', ir.constant_operand(32), '+', operands[0][1], ')','>>',operands[1][1])] elif m == "SAR": IR = [ir.operation(operands[0][1],'=', '(', self.DR("CF"), '>>', ir.constant_operand(32), '+', operands[0][1], ')','<<',operands[1][1])] elif m == "SHL": IR = [ir.operation(operands[0][1],'=', operands[0][1],'<<',operands[1][1])] elif m == "SHR": IR = [ir.operation(operands[0][1],'=', operands[0][1],'>>',operands[1][1])] elif m == "SUB": IR = [ir.operation(operands[0][1],'=', operands[0][1],'-',operands[1][1])] elif m == "TEST": IR = [ir.operation(operands[0][1],'&',operands[1][1])] elif m == "XCHG": have_nop = 0 if operands[0][1].type == 'register' and operands[1][1].type == 'register': if operands[0][1].register_name == "eax" and operands[1][1].register_name == "eax": have_nop = 1 #TODO does not play well with TMEM if have_nop: IR = [ir.operation("NOP")] else: #XXXXX TODO TMEM IR = [ir.operation(self.DR("tval"),'=',operands[0][1]), ir.operation(operands[0][1],'=', operands[1][1]), ir.operation(operands[1][1],'=', self.DR("TVAL"))] elif m == "XOR": IR = [ir.operation(operands[0][1],'=', operands[0][1],'^',operands[1][1])] else: IR = [ir.unhandled_instruction(instruction['mnemonic'])] if TMEM_IR: #print "@@"#, preload, poststore, TMEM_IR out = [] if preload: out += TMEM_IR + [ir.load(self.DR("TMEM"),self.DR("TVAL"))] elif TMEM_IR: if not poststore: out += TMEM_IR if poststore: if IR[0].type == 'operation': #XXX implicit load store hack if len(IR[0].ops) == 1: IR = [ir.operation(self.DR('tval'),'=', *(IR[0].ops))] out += IR + TMEM_IR + [ir.store(self.DR("tval"), self.DR('TMEM'))] else: out += IR return out return IR