def __init__(self, *args, **kwargs): sp = asmbloc.asm_symbol_pool() jitter.__init__(self, ir_x86_16(sp), *args, **kwargs) self.vm.set_little_endian() self.ir_arch.do_stk_segm = False self.orig_irbloc_fix_regs_for_mode = self.ir_arch.irbloc_fix_regs_for_mode self.ir_arch.irbloc_fix_regs_for_mode = self.ir_archbloc_fix_regs_for_mode
def arm_guess_jump_table(mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): ira = get_ira(mnemo, attrib) jra = ExprId('jra') jrb = ExprId('jrb') sp = asm_symbol_pool() ir_arch = ira(sp) ir_arch.add_bloc(cur_bloc) ir_blocs = ir_arch.blocs.values() for irb in ir_blocs: # print 'X'*40 # print irb pc_val = None # lr_val = None for exprs in irb.irs: for e in exprs: if e.dst == ir_arch.pc: pc_val = e.src # if e.dst == mnemo.regs.LR: # lr_val = e.src if pc_val is None: continue if not isinstance(pc_val, ExprMem): continue assert (pc_val.size == 32) print pc_val ad = pc_val.arg ad = expr_simp(ad) print ad res = MatchExpr(ad, jra + jrb, set([jra, jrb])) if res is False: raise NotImplementedError('not fully functional') print res if not isinstance(res[jrb], ExprInt): raise NotImplementedError('not fully functional') base_ad = int(res[jrb]) print base_ad addrs = set() i = -1 max_table_entry = 10000 max_diff_addr = 0x100000 # heuristic while i < max_table_entry: i += 1 try: ad = upck32(pool_bin.getbytes(base_ad + 4 * i, 4)) except: break if abs(ad - base_ad) > max_diff_addr: break addrs.add(ad) print[hex(x) for x in addrs] for ad in addrs: offsets_to_dis.add(ad) l = symbol_pool.getby_offset_create(ad) c = asm_constraint_to(l) cur_bloc.addto(c)
def __init__(self, arch, attrib, symbol_pool=None): if symbol_pool is None: symbol_pool = asm_symbol_pool() self.symbol_pool = symbol_pool self.blocs = {} self.pc = arch.getpc(attrib) self.sp = arch.getsp(attrib) self.arch = arch self.attrib = attrib
def __init__(self, *args, **kwargs): "Alias for 'parse'" # Init attributes self._executable = None self._bin_stream = None self._entry_point = None self._arch = None self._symbol_pool = asm_symbol_pool() # Launch parsing self.parse(*args, **kwargs)
def arm_guess_jump_table( mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): ira = get_ira(mnemo, attrib) jra = ExprId('jra') jrb = ExprId('jrb') sp = asm_symbol_pool() ir_arch = ira(sp) ir_arch.add_bloc(cur_bloc) ir_blocs = ir_arch.blocs.values() for irb in ir_blocs: # print 'X'*40 # print irb pc_val = None # lr_val = None for exprs in irb.irs: for e in exprs: if e.dst == ir_arch.pc: pc_val = e.src # if e.dst == mnemo.regs.LR: # lr_val = e.src if pc_val is None: continue if not isinstance(pc_val, ExprMem): continue assert(pc_val.size == 32) print pc_val ad = pc_val.arg ad = expr_simp(ad) print ad res = MatchExpr(ad, jra + jrb, set([jra, jrb])) if res is False: raise NotImplementedError('not fully functional') print res if not isinstance(res[jrb], ExprInt): raise NotImplementedError('not fully functional') base_ad = int(res[jrb]) print base_ad addrs = set() i = -1 max_table_entry = 10000 max_diff_addr = 0x100000 # heuristic while i < max_table_entry: i += 1 try: ad = upck32(pool_bin.getbytes(base_ad + 4 * i, 4)) except: break if abs(ad - base_ad) > max_diff_addr: break addrs.add(ad) print [hex(x) for x in addrs]
def arm_guess_subcall( mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): ira = get_ira(mnemo, attrib) sp = asm_symbol_pool() ir_arch = ira(sp) print '###' print cur_bloc ir_arch.add_bloc(cur_bloc) ir_blocs = ir_arch.blocs.values() # flow_graph = DiGraph() to_add = set() for irb in ir_blocs: # print 'X'*40 # print irb pc_val = None lr_val = None for exprs in irb.irs: for e in exprs: if e.dst == ir_arch.pc: pc_val = e.src if e.dst == mnemo.regs.LR: lr_val = e.src if pc_val is None or lr_val is None: continue if not isinstance(lr_val, ExprInt): continue l = cur_bloc.lines[-1] if lr_val.arg != l.offset + l.l: continue # print 'IS CALL!' l = symbol_pool.getby_offset_create(int(lr_val)) c = asm_constraint_next(l) to_add.add(c) offsets_to_dis.add(int(lr_val)) # if to_add: # print 'R'*70 for c in to_add: # print c cur_bloc.addto(c)
def arm_guess_subcall(mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): ira = get_ira(mnemo, attrib) sp = asm_symbol_pool() ir_arch = ira(sp) print '###' print cur_bloc ir_arch.add_bloc(cur_bloc) ir_blocs = ir_arch.blocs.values() # flow_graph = DiGraph() to_add = set() for irb in ir_blocs: # print 'X'*40 # print irb pc_val = None lr_val = None for exprs in irb.irs: for e in exprs: if e.dst == ir_arch.pc: pc_val = e.src if e.dst == mnemo.regs.LR: lr_val = e.src if pc_val is None or lr_val is None: continue if not isinstance(lr_val, ExprInt): continue l = cur_bloc.lines[-1] if lr_val.arg != l.offset + l.l: continue # print 'IS CALL!' l = symbol_pool.getby_offset_create(int(lr_val)) c = asm_constraint_next(l) to_add.add(c) offsets_to_dis.add(int(lr_val)) # if to_add: # print 'R'*70 for c in to_add: # print c cur_bloc.addto(c)
def __init__(self, *args, **kwargs): sp = asmbloc.asm_symbol_pool() jitter.__init__(self, ir_armb(sp), *args, **kwargs) self.vm.set_big_endian() self.ir_arch.jit_pc = self.ir_arch.arch.regs.PC
def __init__(self, *args, **kwargs): sp = asmbloc.asm_symbol_pool() jitter.__init__(self, ir_arml(sp), *args, **kwargs) self.vm.set_little_endian()
def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): if symbol_pool is None: symbol_pool = asmbloc.asm_symbol_pool() lines_text = [] lines_data = [] lines_bss = [] C_NEXT = asmbloc.asm_constraint.c_next C_TO = asmbloc.asm_constraint.c_to lines = lines_text # parse each line for line in txt.split('\n'): # empty if re.match(r'\s*$', line): continue # comment if re.match(r'\s*;\S*', line): continue # labels to forget r = re.match(r'\s*\.LF[BE]\d\s*:', line) if r: continue # label beginning with .L r = re.match(r'\s*(\.L\S+)\s*:', line) if r: l = r.groups()[0] l = symbol_pool.getby_name_create(l) lines.append(l) continue # directive if re.match(r'\s*\.', line): r = re.match(r'\s*\.(\S+)', line) directive = r.groups()[0] if directive == 'text': lines = lines_text continue if directive == 'data': lines = lines_data continue if directive == 'bss': lines = lines_bss continue if directive in ['string', 'ascii']: # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r"'")] raw = raw.decode('string_escape') if directive == 'string': raw += "\x00" lines.append(asmbloc.asm_raw(raw)) continue if directive == 'ustring': # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r"'")] + "\x00" raw = raw.decode('string_escape') raw = "".join([string + '\x00' for string in raw]) lines.append(asmbloc.asm_raw(raw)) continue if directive in declarator: data_raw = line[r.end():].split(' ', 1)[1] data_raw = data_raw.split(',') size = declarator[directive] data_int = [] # parser base_expr = gen_base_expr()[2] my_var_parser = parse_ast(lambda x: m2_expr.ExprId(x, size), lambda x: m2_expr.ExprInt_fromsize(size, x)) base_expr.setParseAction(my_var_parser) for b in data_raw: b = b.strip() x = base_expr.parseString(b)[0] data_int.append(x.canonize()) raw = data_int x = asmbloc.asm_raw(raw) x.element_size = size lines.append(x) continue if directive == 'comm': # TODO continue if directive == 'split': # custom command x = asmbloc.asm_raw() x.split = True lines.append(x) continue if directive == 'dontsplit': # custom command lines.append(asmbloc.asm_raw()) continue if directive == "align": align_value = int(line[r.end():]) lines.append(DirectiveAlign(align_value)) continue if directive in ['file', 'intel_syntax', 'globl', 'local', 'type', 'size', 'align', 'ident', 'section']: continue if directive[0:4] == 'cfi_': continue raise ValueError("unknown directive %s" % str(directive)) # label r = re.match(r'\s*(\S+)\s*:', line) if r: l = r.groups()[0] l = symbol_pool.getby_name_create(l) lines.append(l) continue # code if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') instr = mnemo.fromstring(line, attrib) # replace orphan asm_label with labels from symbol_pool replace_orphan_labels(instr, symbol_pool) if instr.dstflow(): instr.dstflow2label(symbol_pool) lines.append(instr) asmbloc.log_asmbloc.info("___pre asm oki___") # make blocs blocs_sections = [] bloc_num = 0 b = None for lines in [lines_text, lines_data, lines_bss]: state = 0 i = 0 blocs = [] blocs_sections.append(blocs) bloc_to_nlink = None block_may_link = False while i < len(lines): # no current bloc if state == 0: if not isinstance(lines[i], asmbloc.asm_label): l = guess_next_new_label(symbol_pool) lines[i:i] = [l] else: l = lines[i] b = asmbloc.asm_bloc(l, alignment=mnemo.alignment) b.bloc_num = bloc_num bloc_num += 1 blocs.append(b) state = 1 i += 1 if bloc_to_nlink: bloc_to_nlink.addto(asmbloc.asm_constraint(b.label, C_NEXT)) bloc_to_nlink = None # in bloc elif state == 1: if isinstance(lines[i], asmbloc.asm_raw): if hasattr(lines[i], 'split'): state = 0 block_may_link = False i += 1 else: state = 1 block_may_link = True b.addline(lines[i]) i += 1 elif isinstance(lines[i], DirectiveAlign): b.alignment = lines[i].alignment i += 1 # asmbloc.asm_label elif isinstance(lines[i], asmbloc.asm_label): if block_may_link: b.addto( asmbloc.asm_constraint(lines[i], C_NEXT)) block_may_link = False state = 0 # instruction else: b.addline(lines[i]) if lines[i].dstflow(): for x in lines[i].getdstflow(symbol_pool): if not isinstance(x, m2_expr.ExprId): continue if x in mnemo.regs.all_regs_ids: continue b.addto(asmbloc.asm_constraint(x, C_TO)) # TODO XXX redo this really if not lines[i].breakflow() and i + 1 < len(lines): if isinstance(lines[i + 1], asmbloc.asm_label): l = lines[i + 1] else: l = guess_next_new_label(symbol_pool) lines[i + 1:i + 1] = [l] else: state = 0 if lines[i].splitflow(): bloc_to_nlink = b if not lines[i].breakflow() or lines[i].splitflow(): block_may_link = True else: block_may_link = False i += 1 for block in blocs_sections[0]: asmbloc.log_asmbloc.info(block) return blocs_sections, symbol_pool
def __init__(self, *args, **kwargs): sp = asmbloc.asm_symbol_pool() jitter.__init__(self, ir_aarch64b(sp), *args, **kwargs) self.vm.set_big_endian()
def __init__(self, *args, **kwargs): from miasm2.arch.msp430.sem import ir_msp430 sp = asmbloc.asm_symbol_pool() jitter.__init__(self, ir_msp430(sp), *args, **kwargs) self.vm.set_little_endian() self.ir_arch.jit_pc = self.ir_arch.arch.regs.PC
def parse_txt(mnemo, attrib, txt, symbol_pool=None): """Parse an assembly listing. Returns a couple (blocks, symbol_pool), where blocks is a list of asm_bloc and symbol_pool the associated asm_symbol_pool @mnemo: architecture used @attrib: architecture attribute @txt: assembly listing @symbol_pool: (optional) the asm_symbol_pool instance used to handle labels of the listing """ if symbol_pool is None: symbol_pool = asmbloc.asm_symbol_pool() C_NEXT = asmbloc.asm_constraint.c_next C_TO = asmbloc.asm_constraint.c_to lines = [] # parse each line for line in txt.split('\n'): # empty if EMPTY_RE.match(line): continue # comment if COMMENT_RE.match(line): continue # labels to forget if FORGET_LABEL_RE.match(line): continue # label beginning with .L match_re = LABEL_RE.match(line) if match_re: label_name = match_re.group(1) label = symbol_pool.getby_name_create(label_name) lines.append(label) continue # directive if DIRECTIVE_START_RE.match(line): match_re = DIRECTIVE_RE.match(line) directive = match_re.group(1) if directive in ['text', 'data', 'bss']: continue if directive in ['string', 'ascii']: # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r'"')] raw = raw.decode('string_escape') if directive == 'string': raw += "\x00" lines.append(asmbloc.asm_raw(raw)) continue if directive == 'ustring': # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r'"')] + "\x00" raw = raw.decode('string_escape') raw = "".join([string + '\x00' for string in raw]) lines.append(asmbloc.asm_raw(raw)) continue if directive in declarator: data_raw = line[match_re.end():].split(' ', 1)[1] data_raw = data_raw.split(',') size = declarator[directive] expr_list = [] # parser base_expr = gen_base_expr()[2] my_var_parser = parse_ast(lambda x: m2_expr.ExprId(x, size), lambda x: m2_expr.ExprInt(x, size)) base_expr.setParseAction(my_var_parser) for element in data_raw: element = element.strip() element_expr = base_expr.parseString(element)[0] expr_list.append(element_expr.canonize()) raw_data = asmbloc.asm_raw(expr_list) raw_data.element_size = size lines.append(raw_data) continue if directive == 'comm': # TODO continue if directive == 'split': # custom command lines.append(DirectiveSplit()) continue if directive == 'dontsplit': # custom command lines.append(DirectiveDontSplit()) continue if directive == "align": align_value = int(line[match_re.end():], 0) lines.append(DirectiveAlign(align_value)) continue if directive in ['file', 'intel_syntax', 'globl', 'local', 'type', 'size', 'align', 'ident', 'section']: continue if directive[0:4] == 'cfi_': continue raise ValueError("unknown directive %s" % str(directive)) # label match_re = LABEL_RE.match(line) if match_re: label_name = match_re.group(1) label = symbol_pool.getby_name_create(label_name) lines.append(label) continue # code if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') instr = mnemo.fromstring(line, attrib) # replace orphan asm_label with labels from symbol_pool replace_orphan_labels(instr, symbol_pool) if instr.dstflow(): instr.dstflow2label(symbol_pool) lines.append(instr) asmbloc.log_asmbloc.info("___pre asm oki___") # make blocks cur_block = None state = STATE_NO_BLOC i = 0 blocks = asmbloc.AsmCFG() block_to_nlink = None delayslot = 0 while i < len(lines): if delayslot: delayslot -= 1 if delayslot == 0: state = STATE_NO_BLOC line = lines[i] # no current block if state == STATE_NO_BLOC: if isinstance(line, DirectiveDontSplit): block_to_nlink = cur_block i += 1 continue elif isinstance(line, DirectiveSplit): block_to_nlink = None i += 1 continue elif not isinstance(line, asmbloc.asm_label): # First line must be a label. If it's not the case, generate # it. label = guess_next_new_label(symbol_pool) cur_block = asmbloc.asm_bloc(label, alignment=mnemo.alignment) else: cur_block = asmbloc.asm_bloc(line, alignment=mnemo.alignment) i += 1 # Generate the current bloc blocks.add_node(cur_block) state = STATE_IN_BLOC if block_to_nlink: block_to_nlink.addto( asmbloc.asm_constraint(cur_block.label, C_NEXT)) block_to_nlink = None continue # in block elif state == STATE_IN_BLOC: if isinstance(line, DirectiveSplit): state = STATE_NO_BLOC block_to_nlink = None elif isinstance(line, DirectiveDontSplit): state = STATE_NO_BLOC block_to_nlink = cur_block elif isinstance(line, DirectiveAlign): cur_block.alignment = line.alignment elif isinstance(line, asmbloc.asm_raw): cur_block.addline(line) block_to_nlink = cur_block elif isinstance(line, asmbloc.asm_label): if block_to_nlink: cur_block.addto( asmbloc.asm_constraint(line, C_NEXT)) block_to_nlink = None state = STATE_NO_BLOC continue # instruction elif isinstance(line, instruction): cur_block.addline(line) block_to_nlink = cur_block if not line.breakflow(): i += 1 continue if delayslot: raise RuntimeError("Cannot have breakflow in delayslot") if line.dstflow(): for dst in line.getdstflow(symbol_pool): if not isinstance(dst, m2_expr.ExprId): continue if dst in mnemo.regs.all_regs_ids: continue cur_block.addto(asmbloc.asm_constraint(dst.name, C_TO)) if not line.splitflow(): block_to_nlink = None delayslot = line.delayslot + 1 else: raise RuntimeError("unknown class %s" % line.__class__) i += 1 for block in blocks: # Fix multiple constraints block.fix_constraints() # Log block asmbloc.log_asmbloc.info(block) return blocks, symbol_pool
def __init__(self, *args, **kwargs): sp = asmbloc.asm_symbol_pool() jitter.__init__(self, ir_mips32b(sp), *args, **kwargs) self.vm.set_big_endian()
def parse_txt(mnemo, attrib, txt, symbol_pool=None): """Parse an assembly listing. Returns a couple (blocks, symbol_pool), where blocks is a list of asm_bloc and symbol_pool the associated asm_symbol_pool @mnemo: architecture used @attrib: architecture attribute @txt: assembly listing @symbol_pool: (optional) the asm_symbol_pool instance used to handle labels of the listing """ if symbol_pool is None: symbol_pool = asmbloc.asm_symbol_pool() C_NEXT = asmbloc.asm_constraint.c_next C_TO = asmbloc.asm_constraint.c_to lines = [] # parse each line for line in txt.split('\n'): # empty if EMPTY_RE.match(line): continue # comment if COMMENT_RE.match(line): continue # labels to forget if FORGET_LABEL_RE.match(line): continue # label beginning with .L match_re = LABEL_RE.match(line) if match_re: label_name = match_re.group(1) label = symbol_pool.getby_name_create(label_name) lines.append(label) continue # directive if DIRECTIVE_START_RE.match(line): match_re = DIRECTIVE_RE.match(line) directive = match_re.group(1) if directive in ['text', 'data', 'bss']: continue if directive in ['string', 'ascii']: # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r'"')] raw = raw.decode('string_escape') if directive == 'string': raw += "\x00" lines.append(asmbloc.asm_raw(raw)) continue if directive == 'ustring': # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r'"')] + "\x00" raw = raw.decode('string_escape') raw = "".join([string + '\x00' for string in raw]) lines.append(asmbloc.asm_raw(raw)) continue if directive in declarator: data_raw = line[match_re.end():].split(' ', 1)[1] data_raw = data_raw.split(',') size = declarator[directive] expr_list = [] # parser base_expr = gen_base_expr()[2] my_var_parser = parse_ast(lambda x: m2_expr.ExprId(x, size), lambda x: m2_expr.ExprInt(x, size)) base_expr.setParseAction(my_var_parser) for element in data_raw: element = element.strip() element_expr = base_expr.parseString(element)[0] expr_list.append(element_expr.canonize()) raw_data = asmbloc.asm_raw(expr_list) raw_data.element_size = size lines.append(raw_data) continue if directive == 'comm': # TODO continue if directive == 'split': # custom command lines.append(DirectiveSplit()) continue if directive == 'dontsplit': # custom command lines.append(DirectiveDontSplit()) continue if directive == "align": align_value = int(line[match_re.end():], 0) lines.append(DirectiveAlign(align_value)) continue if directive in [ 'file', 'intel_syntax', 'globl', 'local', 'type', 'size', 'align', 'ident', 'section' ]: continue if directive[0:4] == 'cfi_': continue raise ValueError("unknown directive %s" % str(directive)) # label match_re = LABEL_RE.match(line) if match_re: label_name = match_re.group(1) label = symbol_pool.getby_name_create(label_name) lines.append(label) continue # code if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') instr = mnemo.fromstring(line, attrib) # replace orphan asm_label with labels from symbol_pool replace_orphan_labels(instr, symbol_pool) if instr.dstflow(): instr.dstflow2label(symbol_pool) lines.append(instr) asmbloc.log_asmbloc.info("___pre asm oki___") # make blocks cur_block = None state = STATE_NO_BLOC i = 0 blocks = [] block_to_nlink = None block_may_link = False delayslot = 0 while i < len(lines): if delayslot: delayslot -= 1 if delayslot == 0: state = STATE_NO_BLOC line = lines[i] # no current block if state == STATE_NO_BLOC: if isinstance(line, DirectiveDontSplit): block_to_nlink = cur_block i += 1 continue elif isinstance(line, DirectiveSplit): block_to_nlink = None i += 1 continue elif not isinstance(line, asmbloc.asm_label): # First line must be a label. If it's not the case, generate # it. label = guess_next_new_label(symbol_pool) cur_block = asmbloc.asm_bloc(label, alignment=mnemo.alignment) else: cur_block = asmbloc.asm_bloc(line, alignment=mnemo.alignment) i += 1 # Generate the current bloc blocks.append(cur_block) state = STATE_IN_BLOC if block_to_nlink: block_to_nlink.addto( asmbloc.asm_constraint(cur_block.label, C_NEXT)) block_to_nlink = None continue # in block elif state == STATE_IN_BLOC: if isinstance(line, DirectiveSplit): state = STATE_NO_BLOC block_to_nlink = None elif isinstance(line, DirectiveDontSplit): state = STATE_NO_BLOC block_to_nlink = cur_block elif isinstance(line, DirectiveAlign): cur_block.alignment = line.alignment elif isinstance(line, asmbloc.asm_raw): cur_block.addline(line) block_to_nlink = cur_block elif isinstance(line, asmbloc.asm_label): if block_to_nlink: cur_block.addto(asmbloc.asm_constraint(line, C_NEXT)) block_to_nlink = None state = STATE_NO_BLOC continue # instruction elif isinstance(line, instruction): cur_block.addline(line) block_to_nlink = cur_block if not line.breakflow(): i += 1 continue if delayslot: raise RuntimeError("Cannot have breakflow in delayslot") if line.dstflow(): for dst in line.getdstflow(symbol_pool): if not isinstance(dst, m2_expr.ExprId): continue if dst in mnemo.regs.all_regs_ids: continue cur_block.addto(asmbloc.asm_constraint(dst.name, C_TO)) if not line.splitflow(): block_to_nlink = None delayslot = line.delayslot + 1 else: raise RuntimeError("unknown class %s" % line.__class__) i += 1 for block in blocks: # Fix multiple constraints block.fix_constraints() # Log block asmbloc.log_asmbloc.info(block) return blocks, symbol_pool
def __init__(self, *args, **kwargs): sp = asmbloc.asm_symbol_pool() jitter.__init__(self, ir_msp430(sp), *args, **kwargs) self.vm.set_little_endian()
def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): if symbol_pool is None: symbol_pool = asmbloc.asm_symbol_pool() lines_text = [] lines_data = [] lines_bss = [] C_NEXT = asmbloc.asm_constraint.c_next C_TO = asmbloc.asm_constraint.c_to lines = lines_text # parse each line for line in txt.split('\n'): # empty if re.match(r'\s*$', line): continue # comment if re.match(r'\s*;\S*', line): continue # labels to forget r = re.match(r'\s*\.LF[BE]\d\s*:', line) if r: continue # label beginning with .L r = re.match(r'\s*(\.L\S+)\s*:', line) if r: l = r.groups()[0] l = symbol_pool.getby_name_create(l) lines.append(l) continue # directive if re.match(r'\s*\.', line): r = re.match(r'\s*\.(\S+)', line) directive = r.groups()[0] if directive == 'text': lines = lines_text continue if directive == 'data': lines = lines_data continue if directive == 'bss': lines = lines_bss continue if directive in ['string', 'ascii']: # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r"'")] raw = raw.decode('string_escape') if directive == 'string': raw += "\x00" lines.append(asmbloc.asm_raw(raw)) continue if directive == 'ustring': # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r"'")] + "\x00" raw = raw.decode('string_escape') raw = "".join([string + '\x00' for string in raw]) lines.append(asmbloc.asm_raw(raw)) continue if directive in declarator: data_raw = line[r.end():].split(' ', 1)[1] data_raw = data_raw.split(',') size = declarator[directive] data_int = [] # parser base_expr = gen_base_expr()[2] my_var_parser = parse_ast( lambda x: m2_expr.ExprId(x, size), lambda x: m2_expr.ExprInt_fromsize(size, x)) base_expr.setParseAction(my_var_parser) for b in data_raw: b = b.strip() x = base_expr.parseString(b)[0] data_int.append(x.canonize()) raw = data_int x = asmbloc.asm_raw(raw) x.element_size = size lines.append(x) continue if directive == 'comm': # TODO continue if directive == 'split': # custom command x = asmbloc.asm_raw() x.split = True lines.append(x) continue if directive == 'dontsplit': # custom command lines.append(asmbloc.asm_raw()) continue if directive == "align": align_value = int(line[r.end():]) lines.append(DirectiveAlign(align_value)) continue if directive in [ 'file', 'intel_syntax', 'globl', 'local', 'type', 'size', 'align', 'ident', 'section' ]: continue if directive[0:4] == 'cfi_': continue raise ValueError("unknown directive %s" % str(directive)) # label r = re.match(r'\s*(\S+)\s*:', line) if r: l = r.groups()[0] l = symbol_pool.getby_name_create(l) lines.append(l) continue # code if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') instr = mnemo.fromstring(line, attrib) # replace orphan asm_label with labels from symbol_pool replace_orphan_labels(instr, symbol_pool) if instr.dstflow(): instr.dstflow2label(symbol_pool) lines.append(instr) asmbloc.log_asmbloc.info("___pre asm oki___") # make blocs blocs_sections = [] bloc_num = 0 b = None for lines in [lines_text, lines_data, lines_bss]: state = 0 i = 0 blocs = [] blocs_sections.append(blocs) bloc_to_nlink = None block_may_link = False while i < len(lines): # no current bloc if state == 0: if not isinstance(lines[i], asmbloc.asm_label): l = guess_next_new_label(symbol_pool) lines[i:i] = [l] else: l = lines[i] b = asmbloc.asm_bloc(l, alignment=mnemo.alignment) b.bloc_num = bloc_num bloc_num += 1 blocs.append(b) state = 1 i += 1 if bloc_to_nlink: bloc_to_nlink.addto( asmbloc.asm_constraint(b.label, C_NEXT)) bloc_to_nlink = None # in bloc elif state == 1: if isinstance(lines[i], asmbloc.asm_raw): if hasattr(lines[i], 'split'): state = 0 block_may_link = False i += 1 else: state = 1 block_may_link = True b.addline(lines[i]) i += 1 elif isinstance(lines[i], DirectiveAlign): b.alignment = lines[i].alignment i += 1 # asmbloc.asm_label elif isinstance(lines[i], asmbloc.asm_label): if block_may_link: b.addto(asmbloc.asm_constraint(lines[i], C_NEXT)) block_may_link = False state = 0 # instruction else: b.addline(lines[i]) if lines[i].dstflow(): for x in lines[i].getdstflow(symbol_pool): if not isinstance(x, m2_expr.ExprId): continue if x in mnemo.regs.all_regs_ids: continue b.addto(asmbloc.asm_constraint(x, C_TO)) # TODO XXX redo this really if not lines[i].breakflow() and i + 1 < len(lines): if isinstance(lines[i + 1], asmbloc.asm_label): l = lines[i + 1] else: l = guess_next_new_label(symbol_pool) lines[i + 1:i + 1] = [l] else: state = 0 if lines[i].splitflow(): bloc_to_nlink = b if not lines[i].breakflow() or lines[i].splitflow(): block_may_link = True else: block_may_link = False i += 1 for block in blocs_sections[0]: asmbloc.log_asmbloc.info(block) return blocs_sections, symbol_pool