def func_read(self, expr_mem): """Memory read wrapper for symbolic execution @expr_mem: ExprMem""" addr = expr_mem.arg.arg.arg size = expr_mem.size / 8 value = self.vmmngr.get_mem(addr, size) return m2_expr.ExprInt_fromsize(expr_mem.size, int(value[::-1].encode("hex"), 16))
def update_engine_from_cpu(cpu, exec_engine): """Updates CPU values according to @cpu instance @cpu: JitCpu instance @exec_engine: symbexec instance""" for symbol in exec_engine.symbols: if isinstance(symbol, m2_expr.ExprId): if hasattr(cpu, symbol.name): value = m2_expr.ExprInt_fromsize(symbol.size, getattr(cpu, symbol.name)) exec_engine.symbols.symbols_id[symbol] = value else: raise NotImplementedError("Type not handled: %s" % symbol)
def resolve_args_with_symbols(self, symbols=None): if symbols is None: symbols = {} args_out = [] for a in self.args: e = a # try to resolve symbols using symbols (0 for default value) ids = m2_expr.get_expr_ids(e) fixed_ids = {} for x in ids: if isinstance(x.name, asmbloc.asm_label): name = x.name.name if not name in symbols: raise ValueError('unresolved symbol! %r' % x) else: name = x.name # special symbol if name == '$': fixed_ids[x] = self.get_asm_offset(x) continue if not name in symbols: continue if symbols[name].offset is None: default_size = self.get_symbol_size(x, symbols) # default value value = m2_expr.ExprInt_fromsize(default_size, 0) else: size = x.size if size is None: default_size = self.get_symbol_size(x, symbols) size = default_size value = m2_expr.ExprInt_fromsize(size, symbols[name].offset) fixed_ids[x] = value e = e.replace_expr(fixed_ids) e = expr_simp(e) args_out.append(e) return args_out
def extract_ast_core(v, my_id2expr, my_int2expr): ast_tokens = _extract_ast_core(v) ids = ast_get_ids(ast_tokens) ids_expr = [my_id2expr(x) for x in ids] sizes = set([i.size for i in ids_expr]) if len(sizes) == 0: pass elif len(sizes) == 1: size = sizes.pop() my_int2expr = lambda x: m2_expr.ExprInt_fromsize(size, x) else: raise ValueError('multiple sizes in ids') e = ast_raw2expr(ast_tokens, my_id2expr, my_int2expr) return e
def number(cls, size=32): """Return a random number @size: (optional) number max bits """ num = random.randint(0, cls.number_max % (2**size)) return m2_expr.ExprInt_fromsize(size, num)
def merge_sliceto_slice(args): sources = {} non_slice = {} sources_int = {} for a in args: if isinstance(a[0], m2_expr.ExprInt): # sources_int[a.start] = a # copy ExprInt because we will inplace modify arg just below # /!\ TODO XXX never ever modify inplace args... sources_int[a[1]] = (m2_expr.ExprInt_fromsize( a[2] - a[1], a[0].arg.__class__(a[0].arg)), a[1], a[2]) elif isinstance(a[0], m2_expr.ExprSlice): if not a[0].arg in sources: sources[a[0].arg] = [] sources[a[0].arg].append(a) else: non_slice[a[1]] = a # find max stop to determine size max_size = None for a in args: if max_size is None or max_size < a[2]: max_size = a[2] # first simplify all num slices final_sources = [] sorted_s = [] for x in sources_int.values(): x = list(x) # mask int v = x[0].arg & ((1 << (x[2] - x[1])) - 1) x[0] = m2_expr.ExprInt_from(x[0], v) x = tuple(x) sorted_s.append((x[1], x)) sorted_s.sort() while sorted_s: start, v = sorted_s.pop() out = [m2_expr.ExprInt(v[0].arg), v[1], v[2]] size = v[2] - v[1] while sorted_s: if sorted_s[-1][1][2] != start: break s_start, s_stop = sorted_s[-1][1][1], sorted_s[-1][1][2] size += s_stop - s_start a = m2_expr.mod_size2uint[size]((int(out[0].arg) << (out[1] - s_start)) + int(sorted_s[-1][1][0].arg)) out[0] = m2_expr.ExprInt(a) sorted_s.pop() out[1] = s_start out[0] = m2_expr.ExprInt_fromsize(size, out[0].arg) final_sources.append((start, out)) final_sources_int = final_sources # check if same sources have corresponding start/stop # is slice AND is sliceto simp_sources = [] for args in sources.values(): final_sources = [] sorted_s = [] for x in args: sorted_s.append((x[1], x)) sorted_s.sort() while sorted_s: start, v = sorted_s.pop() ee = v[0].arg[v[0].start:v[0].stop] out = ee, v[1], v[2] while sorted_s: if sorted_s[-1][1][2] != start: break if sorted_s[-1][1][0].stop != out[0].start: break start = sorted_s[-1][1][1] # out[0].start = sorted_s[-1][1][0].start o_e, _, o_stop = out o1, o2 = sorted_s[-1][1][0].start, o_e.stop o_e = o_e.arg[o1:o2] out = o_e, start, o_stop # update _size # out[0]._size = out[0].stop-out[0].start sorted_s.pop() out = out[0], start, out[2] final_sources.append((start, out)) simp_sources += final_sources simp_sources += final_sources_int for i, v in non_slice.items(): simp_sources.append((i, v)) simp_sources.sort() simp_sources = [x[1] for x in simp_sources] return simp_sources
def int2expr(self, v): if (v & ~self.intmask) != 0: return None return m2_expr.ExprInt_fromsize(self.intsize, v)
def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): if symbol_pool is None: symbol_pool = asm_symbol_pool() lines_text = [] lines_data = [] lines_bss = [] lines = lines_text # parse each line for line in txt.split('\n'): # empty if re.match(r'\s*$', line): continue # comment if re.match(r'\s*;\S*', line): continue # labels to forget r = re.match(r'\s*\.LF[BE]\d\s*:', line) if r: continue # label beginning with .L r = re.match(r'\s*(\.L\S+)\s*:', line) if r: l = r.groups()[0] l = symbol_pool.getby_name_create(l) lines.append(l) continue # directive if re.match(r'\s*\.', line): r = re.match(r'\s*\.(\S+)', line) directive = r.groups()[0] if directive == 'text': lines = lines_text continue if directive == 'data': lines = lines_data continue if directive == 'bss': lines = lines_bss continue if directive in ['string', 'ascii']: # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r"'")] raw = raw.decode('string_escape') if directive == 'string': raw += "\x00" lines.append(asm_raw(raw)) continue if directive == 'ustring': # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r"'")] + "\x00" raw = raw.decode('string_escape') raw = "".join(map(lambda x: x + '\x00', raw)) lines.append(asm_raw(raw)) continue if directive in declarator: data_raw = line[r.end():].split(' ', 1)[1] data_raw = data_raw.split(',') size = declarator[directive] data_int = [] has_symb = False # parser variable, operand, base_expr = gen_base_expr() my_var_parser = parse_ast( lambda x: m2_expr.ExprId(x, size), lambda x: m2_expr.ExprInt_fromsize(size, x)) base_expr.setParseAction(my_var_parser) for b in data_raw: b = b.strip() x = base_expr.parseString(b)[0] data_int.append(x.canonize()) p = size2pck[size] raw = data_int x = asm_raw(raw) x.element_size = size lines.append(x) continue if directive == 'comm': # TODO continue if directive == 'split': # custom command x = asm_raw() x.split = True lines.append(x) continue if directive == 'dontsplit': # custom command lines.append(asm_raw(line.strip())) continue if directive in [ 'file', 'intel_syntax', 'globl', 'local', 'type', 'size', 'align', 'ident', 'section' ]: continue if directive[0:4] == 'cfi_': continue raise ValueError("unknown directive %s" % str(directive)) # label r = re.match(r'\s*(\S+)\s*:', line) if r: l = r.groups()[0] l = symbol_pool.getby_name_create(l) lines.append(l) continue # code if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') instr = mnemo.fromstring(line, attrib) if instr.dstflow(): instr.dstflow2label(symbol_pool) lines.append(instr) log_asmbloc.info("___pre asm oki___") # make blocs # gen_label_index = 0 blocs_sections = [] bloc_num = 0 for lines in [lines_text, lines_data, lines_bss]: state = 0 i = 0 blocs = [] blocs_sections.append(blocs) bloc_to_nlink = None block_may_link = False while i < len(lines): # print 'DEAL', lines[i], state # no current bloc if state == 0: if not isinstance(lines[i], asm_label): l = guess_next_new_label(symbol_pool) lines[i:i] = [l] else: l = lines[i] b = asm_bloc(l) b.bloc_num = bloc_num bloc_num += 1 blocs.append(b) state = 1 i += 1 if bloc_to_nlink: # print 'nlink!' bloc_to_nlink.addto( asm_constraint(b.label, asm_constraint.c_next)) bloc_to_nlink = None # in bloc elif state == 1: # asm_raw if isinstance(lines[i], asm_raw): if hasattr(lines[i], 'split'): state = 0 block_may_link = False i += 1 else: #if lines[i].raw.startswith('.dontsplit'): # raw asm are link by default # print 'dontsplit' state = 1 block_may_link = True b.addline(lines[i]) i += 1 """ else: b.addline(lines[i]) i += 1 """ # asm_label elif isinstance(lines[i], asm_label): if block_may_link: # print 'nlink!' b.addto(asm_constraint(lines[i], asm_constraint.c_next)) block_may_link = False state = 0 # instruction else: b.addline(lines[i]) if lines[i].dstflow(): ''' mydst = lines[i].args if len(mydst)==1 and mnemo.get_symbols(mydst[0]): arg = dict(mydst[0]) symbs = mnemo.get_symbols(arg) """ TODO XXX redo this (as many miasm parts) """ l = symbs[0][0] lines[i].setdstflow([l]) b.addto(asm_constraint(l, asm_constraint.c_to)) ''' for x in lines[i].getdstflow(symbol_pool): if not isinstance(x, m2_expr.ExprId): continue if x in mnemo.regs.all_regs_ids: continue b.addto(asm_constraint(x, asm_constraint.c_to)) # TODO XXX redo this really if not lines[i].breakflow() and i + 1 < len(lines): if isinstance(lines[i + 1], asm_label): l = lines[i + 1] else: l = guess_next_new_label(symbol_pool) lines[i + 1:i + 1] = [l] else: state = 0 if lines[i].splitflow(): bloc_to_nlink = b if not lines[i].breakflow() or lines[i].splitflow(): block_may_link = True else: block_may_link = False i += 1 for b in blocs_sections[0]: log_asmbloc.info(b) return blocs_sections, symbol_pool
def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): if symbol_pool is None: symbol_pool = asmbloc.asm_symbol_pool() lines_text = [] lines_data = [] lines_bss = [] C_NEXT = asmbloc.asm_constraint.c_next C_TO = asmbloc.asm_constraint.c_to lines = lines_text # parse each line for line in txt.split('\n'): # empty if re.match(r'\s*$', line): continue # comment if re.match(r'\s*;\S*', line): continue # labels to forget r = re.match(r'\s*\.LF[BE]\d\s*:', line) if r: continue # label beginning with .L r = re.match(r'\s*(\.L\S+)\s*:', line) if r: l = r.groups()[0] l = symbol_pool.getby_name_create(l) lines.append(l) continue # directive if re.match(r'\s*\.', line): r = re.match(r'\s*\.(\S+)', line) directive = r.groups()[0] if directive == 'text': lines = lines_text continue if directive == 'data': lines = lines_data continue if directive == 'bss': lines = lines_bss continue if directive in ['string', 'ascii']: # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r"'")] raw = raw.decode('string_escape') if directive == 'string': raw += "\x00" lines.append(asmbloc.asm_raw(raw)) continue if directive == 'ustring': # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r"'")] + "\x00" raw = raw.decode('string_escape') raw = "".join([string + '\x00' for string in raw]) lines.append(asmbloc.asm_raw(raw)) continue if directive in declarator: data_raw = line[r.end():].split(' ', 1)[1] data_raw = data_raw.split(',') size = declarator[directive] data_int = [] # parser base_expr = gen_base_expr()[2] my_var_parser = parse_ast( lambda x: m2_expr.ExprId(x, size), lambda x: m2_expr.ExprInt_fromsize(size, x)) base_expr.setParseAction(my_var_parser) for b in data_raw: b = b.strip() x = base_expr.parseString(b)[0] data_int.append(x.canonize()) raw = data_int x = asmbloc.asm_raw(raw) x.element_size = size lines.append(x) continue if directive == 'comm': # TODO continue if directive == 'split': # custom command x = asmbloc.asm_raw() x.split = True lines.append(x) continue if directive == 'dontsplit': # custom command lines.append(asmbloc.asm_raw()) continue if directive == "align": align_value = int(line[r.end():]) lines.append(DirectiveAlign(align_value)) continue if directive in [ 'file', 'intel_syntax', 'globl', 'local', 'type', 'size', 'align', 'ident', 'section' ]: continue if directive[0:4] == 'cfi_': continue raise ValueError("unknown directive %s" % str(directive)) # label r = re.match(r'\s*(\S+)\s*:', line) if r: l = r.groups()[0] l = symbol_pool.getby_name_create(l) lines.append(l) continue # code if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') instr = mnemo.fromstring(line, attrib) # replace orphan asm_label with labels from symbol_pool replace_orphan_labels(instr, symbol_pool) if instr.dstflow(): instr.dstflow2label(symbol_pool) lines.append(instr) asmbloc.log_asmbloc.info("___pre asm oki___") # make blocs blocs_sections = [] bloc_num = 0 b = None for lines in [lines_text, lines_data, lines_bss]: state = 0 i = 0 blocs = [] blocs_sections.append(blocs) bloc_to_nlink = None block_may_link = False while i < len(lines): # no current bloc if state == 0: if not isinstance(lines[i], asmbloc.asm_label): l = guess_next_new_label(symbol_pool) lines[i:i] = [l] else: l = lines[i] b = asmbloc.asm_bloc(l, alignment=mnemo.alignment) b.bloc_num = bloc_num bloc_num += 1 blocs.append(b) state = 1 i += 1 if bloc_to_nlink: bloc_to_nlink.addto( asmbloc.asm_constraint(b.label, C_NEXT)) bloc_to_nlink = None # in bloc elif state == 1: if isinstance(lines[i], asmbloc.asm_raw): if hasattr(lines[i], 'split'): state = 0 block_may_link = False i += 1 else: state = 1 block_may_link = True b.addline(lines[i]) i += 1 elif isinstance(lines[i], DirectiveAlign): b.alignment = lines[i].alignment i += 1 # asmbloc.asm_label elif isinstance(lines[i], asmbloc.asm_label): if block_may_link: b.addto(asmbloc.asm_constraint(lines[i], C_NEXT)) block_may_link = False state = 0 # instruction else: b.addline(lines[i]) if lines[i].dstflow(): for x in lines[i].getdstflow(symbol_pool): if not isinstance(x, m2_expr.ExprId): continue if x in mnemo.regs.all_regs_ids: continue b.addto(asmbloc.asm_constraint(x, C_TO)) # TODO XXX redo this really if not lines[i].breakflow() and i + 1 < len(lines): if isinstance(lines[i + 1], asmbloc.asm_label): l = lines[i + 1] else: l = guess_next_new_label(symbol_pool) lines[i + 1:i + 1] = [l] else: state = 0 if lines[i].splitflow(): bloc_to_nlink = b if not lines[i].breakflow() or lines[i].splitflow(): block_may_link = True else: block_may_link = False i += 1 for block in blocs_sections[0]: asmbloc.log_asmbloc.info(block) return blocs_sections, symbol_pool
def my_ast_int2expr(a): return m2_expr.ExprInt_fromsize(size, a)