def evaluate_lines(instr, lines, in_str): # Run the emulation of the basic bloc machine = mapper() def print_machine(machine): return sorted(str(machine).split("\n")) # Emulation with amoco is too slow # For 'movsd' tests depending of 'df' are not simplified, therefore there # are too many nested tests # For 'cmovXX' tests are not simplified either perf_count = {'movsd': 0, 'cmov': 0} for line in lines: # rip is kept symbolic if line.opname in ('call', 'jmp'): machine[env.rip] = env.rip else: machine[env.rip] = env.rip - line.bytelen if line.opname == 'movsd': perf_count['movsd'] += 1 if perf_count['movsd'] >= 3: return (('Emulated too slow (movsd)', line, print_machine(machine)), [None]) if line.opname.startswith('cmov'): perf_count['cmov'] += 1 if perf_count['cmov'] >= 3: return (('Emulated too slow (cmov)', line, print_machine(machine)), [None]) try: log.debug("EVAL %-20r %s", line.amoco.bytes, line) line.amoco(machine) except NotImplementedError: return (('Not implemented', line, print_machine(machine)), [None]) except NameError: return (('Cannot be emulated (name)', line, print_machine(machine)), [None]) except amoco.arch.core.InstructionError: return (('Cannot be emulated', line, print_machine(machine)), [None]) except TypeError: return (('Not callable', line, print_machine(machine)), [None]) if line.opname == 'call': # amoco emulation pushes rip+i.length # we prefer to push the label of the next basic bloc label = instr.symbols.find_symbol(section=line.section, address=line.offset + line.amoco.length) machine[env.mem(env.rsp, cpu_addrsize)] = expressions.lab(label, size=cpu_addrsize) retval = machine[env.rip] msg, val = evaluate(retval, machine, instr.symbols.find_symbols, instr, in_str) if val is None: return ((str(retval.__class__), retval, print_machine(machine)), [None]) elif val == [None]: return ((msg, retval, print_machine(machine)), [None]) else: return (msg, val)
def remove_pic_offset(e, pool): log.debug("DETECT PIC FROM %s:%s", e.__class__.__name__, e) if e._is_tst: label_l = remove_pic_offset(e.l, pool) label_r = remove_pic_offset(e.r, pool) if label_l is None or label_r is None: return None return env.tst(e.tst, label_l, label_r) # M32[M32[M32[PIC_OFFSET+toto@GOT]]+cte] # => M32[M32[toto]+cte] if e._is_mem \ and e.a.base._is_mem \ and e.a.base.a.disp == 0 \ and e.a.base.a.base._is_mem: label = remove_pic_offset(e.a.base.a.base, pool) if label is None: return None return env.mem(env.mem(label), disp=e.a.disp) # M32[M32[PIC_OFFSET+toto@GOTPCREL]+cte] # => M32[toto+cte] if e._is_mem and e.a.base._is_mem: label = remove_pic_offset(e.a.base, pool) if label is None: return None return env.mem(label, disp=e.a.disp) if e._is_mem and not hasattr(e.a.disp, '_is_lab'): log.debug("BASE %s; DISP %s; TODO", e.a.base, e.a.disp) return None # M32[PIC_OFFSET+toto@GOTPCREL] # => toto if e._is_mem \ and e.a.disp._is_lab \ and e.a.disp.ref.name.endswith('@GOTPCREL'): label_name = e.a.disp.ref.name[:-9] pic_data = e.a.base if not check_pic_data(pic_data): NON_REGRESSION_FOUND log.debug("PIC OFFSET [%s] LABEL %s", pic_data, label_name) return None return env.lab(pool.find_symbol(name=label_name), size=cpu_addrsize)
def getModRM(obj, Mod, RM, data, REX=None): opdsz = obj.misc['opdsz'] or 32 adrsz = obj.misc['adrsz'] or 64 seg = obj.misc['segreg'] if seg is None: seg = '' W, R, X, B = REX or getREX(obj) if opdsz != 8 and W == 1: opdsz = 64 # r/16/32/64 case: if Mod == 0b11: op1 = getregB(obj, RM, opdsz) return op1, data # SIB cases : if adrsz != 16 and RM == 0b100: # read SIB byte in data: if data.size < 8: raise InstructionError(obj) sib, data = data[0:8], data[8:data.size] # add sib byte: obj.bytes += pack(sib) # decode base & scaled index b = env.getreg((B << 3) + sib[0:3].int(), adrsz) i = env.getreg((X << 3) + sib[3:6].int(), adrsz) ss = 1 << (sib[6:8].int()) s = i * ss if not i.ref in ('rsp', 'esp', 'sp') else 0 else: s = 0 if adrsz != 16: b = env.getreg((B << 3) + RM, adrsz) else: b = (env.bx + env.si, env.bx + env.di, env.bp + env.si, env.bp + env.di, env.si, env.di, env.bp, env.bx)[RM] # check special disp32 case (RIP-relative addressing): if Mod == 0: if RM == 0b101: b = env.rip if seg is '': seg = env.cs Mod = 0b10 elif b.ref in ('rbp', 'r13'): b = s + env.cst(0, adrsz) s = 0 Mod = 0b10 if s is 0: bs = b elif env.internals.get('keep_order'): # Instead of doing bs = b+s, which will reorder arguments, we do # the addition manually, and change 'prop' so the many future calls # to 'simplify' does not reorder the arguments bs = env.op('+', b, s) bs.prop |= 16 else: bs = b + s # now read displacement bytes: if Mod == 0b00: d = 0 elif Mod == 0b01: if data.size < 8: raise InstructionError(obj) d = data[0:8] data = data[8:data.size] obj.bytes += pack(d) d = d.signextend(adrsz).int(-1) elif Mod == 0b10: immsz = adrsz if immsz == 64: immsz = 32 if data.size < immsz: raise InstructionError(obj) d = data[0:immsz] obj.bytes += pack(d) data = data[immsz:data.size] d = d.int(-1) if bs._is_cst and bs.v == 0x0: bs.v = d bs.size = adrsz d = 0 if opdsz is 'mm': opdsz = 64 return env.mem(bs, opdsz, seg, d), data
def getModRM(obj,Mod,RM,data): opdsz = obj.misc['opdsz'] or 32 adrsz = obj.misc['adrsz'] or 64 seg = obj.misc['segreg'] if seg is None: seg='' REX = obj.misc['REX'] if REX is None: W=R=X=B=0 else: W,R,X,B = REX if W==1: opdsz = 64 # r/16/32 case: if Mod==0b11: op1 = env.getreg((B<<3)+RM,opdsz) return op1,data # m/16/32 case: if adrsz!=16 and RM==0b100: # read SIB byte in data: if data.size<8: raise InstructionError(obj) sib,data = data[0:8],data[8:data.size] # add sib byte: obj.bytes += pack(sib) # decode base & scaled index b = env.getreg((B<<3)+sib[0:3].int(),adrsz) i = env.getreg((X<<3)+sib[3:6].int(),adrsz) ss = 1<<(sib[6:8].int()) s = i*ss if not i.ref in ('rsp','esp','sp') else 0 else: s = 0 if adrsz!=16: b = env.getreg((B<<3)+RM,adrsz) else: b = (env.bx+env.si, env.bx+env.di, env.bp+env.si, env.bp+env.di, env.si, env.di, env.bp, env.bx)[RM] # check [disp16/32] case: if (b is env.rbp or b is env.r13) and Mod==0: b=env.rip if seg is '': seg = env.cs Mod = 0b10 if (b is env.bp) and Mod==0: b=env.cst(0,adrsz) Mod = 0b10 # now read displacement bytes: if Mod==0b00: d = 0 elif Mod==0b01: if data.size<8: raise InstructionError(obj) d = data[0:8] data = data[8:data.size] obj.bytes += pack(d) d = d.signextend(adrsz).int(-1) elif Mod==0b10: immsz = adrsz if immsz==64: immsz=32 if data.size<immsz: raise InstructionError(obj) d = data[0:immsz] obj.bytes += pack(d) data = data[immsz:data.size] d = d.int(-1) bs = b+s if bs._is_cst and bs.v==0x0: bs.v = d bs.size = adrsz d = 0 return env.mem(bs,opdsz,seg,d),data
def array_detection(input, machine, find, instr, in_str): log.debug("ARRAY_DETECT %s\n\t%s", input.__class__.__name__, input) dst_lst = [] # Is it an element of an array? # Find the multiplication, replace it by 'index_in_array' index_var = env.ext('index_in_array', size=cpu_addrsize) item_len = 0 if input.op.symbol == '+' and input.l._is_eqn: if input.l.op.symbol == '+' and input.l.l._is_eqn and \ input.l.l.op.symbol == '*' and input.l.l.r._is_cst: item_len = int(input.l.l.r) input.l.l = index_var elif input.l.op.symbol == '+' and input.l.r._is_eqn and \ input.l.r.op.symbol == '*' and input.l.r.r._is_cst: item_len = int(input.l.r.r) input.l.r = index_var elif input.l.op.symbol == '*' and input.l.r._is_cst: item_len = int(input.l.r) input.l = index_var elif input.l.op.symbol == '<<': item_len = 1 << int(input.l.r) input.l = index_var elif input.op.symbol == '+' and input.r._is_eqn: if input.r.op.symbol == '*' and input.r.r._is_cst: item_len = int(input.r.r) if input.r.l._is_ptr and input.r.l.disp == 0 and \ input.r.l.base._is_eqn and input.r.l.base.op.symbol == '+' and \ input.r.l.base.r._is_eqn and input.r.l.base.r.op.symbol == '*' \ and input.r.l.base.r.r._is_cst \ and input.r.l.base.l == input.r.l.base.r.l: NON_REGRESSION_FOUND item_len *= 1 + int(input.r.l.base.r.r) input.r = index_var elif input.r.op.symbol == '<<': NON_REGRESSION_FOUND item_len = 1 << int(input.r.r) input.r = index_var if item_len == 0: msg = 'MEM_EXP - NOT AN ARRAY' return msg, [None] log.debug(" ARRAY of %d-byte items", item_len) # Usually 4-byte items invalid_indexes = 0 index_in_array = -item_len while invalid_indexes < 4: index_in_array += item_len m2 = mapper() m2[index_var] = env.cst(index_in_array, size=cpu_addrsize) address_in_array = input.eval(m2) log.debug(" x[%d] at %s:%s", index_in_array // item_len, address_in_array.__class__.__name__, address_in_array) msg, val = 'NOT FOUND', None table, offset = expr.get_lab_imm(address_in_array) if val is None and offset is not None: msg, val = deref_table(table, offset, instr, in_str) if val is None: mapper.assume_no_aliasing = True offset = machine.M(env.mem(address_in_array)) mapper.assume_no_aliasing = False offset = remove_got(offset, instr.symbols) v = expr.get_lab(offset) if v: msg, val = 'MEM', [v] table, offset = expr.get_lab_imm(expr.get_mem(offset, instr)) if offset is not None: msg, val = deref_table(table, offset, instr, in_str) if val == 'TABLE': return msg, val if val in (None, [None]): log.debug(" ----> %s", msg) invalid_indexes += 1 continue for label in val: if label.name.endswith('@GOTOFF'): NON_REGRESSION_FOUND # to make this work also with executables, we will need to # change our API and get the offset value that will have to # be substracted; removing @GOTOFF is not enough! label = find(name=label.name[:-7])[0] log.debug(" => %s", label) if not label in dst_lst: dst_lst.append(label) if dst_lst == []: return 'MEM_EXP', [None] return 'ARRAY', dst_lst