def check_cfg(fn: Callable, cfg: ControlFlowGraph): bytecode = fn.__code__.co_code # Check that all get_node returns the correct node # for all instruction offsets in bytecode current_block = cfg.offset2block[0] end_offset = current_block.bb.end_offset need_new_block = False offset2block = cfg.offset2block cached_offsets = len(offset2block) cache_diff = 0 for inst in get_instructions_bytes(bytecode, opc): offset = inst.offset if need_new_block: current_block = offset2block[offset] end_offset = current_block.bb.end_offset if offset == end_offset: need_new_block = True else: # Increment number of entries added to cache after next cfg.get_node cache_diff += 1 need_new_block = False assert current_block == cfg.get_node(offset) # Next check that all cfg.offset2block is populated # for all instruction offsets in bytecode as a result of # asking for each offset above assert all((inst.offset in offset2block for inst in get_instructions_bytes(bytecode, opc))) # Assert offset originally was in offset2block or was added in cache assert len(offset2block) == cached_offsets + cache_diff return
def nop_jump_junk(co, opc): # Get the first jump target. first_jump_inst = next(get_instructions_bytes(co.co_code, opc)) first_jump_target = first_jump_inst.argval # Abuse the text header to find the second jump. header_start_target = co.co_code.find(b'\r\n\r\n========') has_pyprotect_header = header_start_target != -1 # Get the start of the JUMP_FORWARD, right before the header. jump_forward_inst_size = instruction_size(opc.opmap['JUMP_FORWARD'], opc) second_jump_start = header_start_target - jump_forward_inst_size # Go over the existing code and decide whether to copy each byte. fixed_code = bytearray() for i in range(len(co.co_code)): if (i < first_jump_target): # Replace the first anti-disassembly jump with NOPs fixed_code.append(opc.opmap['NOP']) elif has_pyprotect_header and (i >= second_jump_start): # Don't copy the second AD jump or anything after it. pass else: # Copy everything else. fixed_code.append(co.co_code[i]) # Set fixed code back on co. co.co_code = fixed_code co.freeze()
def recursive_fix_stage3(co2): co2.co_name = str(co2.co_name) # Replace the jump+junk anti-disassembler instructions with NOP. nop_jump_junk(co2, opc) # Remove the NOP's and recalculate the offsets. co2 = remove_nops(co2, opc, version) insts = list(get_instructions_bytes(co2, opc)) for (k, v) in enumerate(insts): if (insts[k + 0].opname == 'LOAD_CONST' and insts[k + 1].opname == 'MAKE_FUNCTION' and insts[k + 2].opname == 'STORE_NAME'): co2.co_consts[insts[k + 0].arg].co_name = str( co2.co_names[insts[k + 2].arg]) # Recursively fix up the code objects in `co_consts` consts = list(co2.co_consts) for (k, v) in enumerate(consts): if xdis.code.iscode(v) and 'pyprotect' in v.co_filename: consts[k] = recursive_fix_stage3(v) pass co2.co_consts = tuple(consts) co2.freeze() return co2
def _populate_lines(self): code = self.code last_line_info = None for instr in get_instructions_bytes( bytecode=code.co_code, opc=self.opc, varnames=code.co_varnames, names=code.co_names, constants=code.co_consts, cells=code.co_cellvars + code.co_freevars, linestarts=self.linestarts, ): offset = instr.offset self.offsets.append(offset) self.instructions.append(instr) if instr.starts_line: if last_line_info: self.lines.append(last_line_info) pass last_line_info = LineOffsets(instr.starts_line, [offset], code) else: last_line_info.offsets.append(offset) pass pass self.lines.append(last_line_info) if self.include_children: for c in code.co_consts: if iscode(c): code_info = LineOffsetInfo(self.opc, c, True) self.children[code_info.name] = code_info self.lines += code_info.lines pass pass pass
def remove_nops(co, opc, version): asm = Assembler(str(version)) asm.code = co asm.code.instructions = [] # Disassemble the original instructions, # ignore if NOP, recalculate if absolute jump # then append them to our assembler. removed_nop_count = 0 cur_offset = 0 for inst in get_instructions_bytes(asm.code, opc): if inst.opname is 'NOP': removed_nop_count += 1 continue # Recalculate absolute jump arg. arg = inst.arg if inst.optype is 'jabs': joff = inst.arg - inst.offset arg = cur_offset + joff # Create a new `xasm` Instruction. new_inst = Instruction() new_inst.opcode = inst.opcode new_inst.arg = arg new_inst.offset = cur_offset new_inst.line_no = 0 # Add the instruction to the assembler. asm.code.instructions.append(new_inst) cur_offset += inst.inst_size code = create_code(asm, [], []) # HACK/FIX: xasm's `create_code` makes co_code a str on opcode version < 3, # when it should _probably_ be checking the interpreter version instead. # uncompyle6 requires this to be bytes-like, so we convert it. if sys.version_info > (3, 0, 0): code.co_code = bytes([ord(c) for c in code.co_code]) return code
def disassemble_bytes(orig_msg, orig_msg_nocr, code, lasti=-1, cur_line=0, start_line=-1, end_line=None, relative_pos=False, varnames=(), names=(), constants=(), cells=(), freevars=(), linestarts={}, highlight='light', start_offset=0, end_offset=None): """Disassemble byte string of code. If end_line is negative it counts the number of statement linestarts to use.""" statement_count = 10000 if end_line is None: end_line = 10000 elif relative_pos: end_line += start_line -1 pass labels = findlabels(code) null_print = lambda x: None if start_line > cur_line: msg_nocr = null_print msg = null_print else: msg_nocr = orig_msg_nocr msg = orig_msg for instr in get_instructions_bytes(code, opc, varnames, names, constants, cells, linestarts): offset = instr.offset if end_offset and offset > end_offset: break if instr.starts_line: if offset: msg("") cur_line = instr.starts_line if (start_line and ((start_line > cur_line) or start_offset and start_offset > offset)) : msg_nocr = null_print msg = null_print else: statement_count -= 1 msg_nocr = orig_msg_nocr msg = orig_msg pass if ((cur_line > end_line) or (end_offset and offset > end_offset)): break msg_nocr(format_token(Mformat.LineNumber, "%3d" % cur_line, highlight=highlight)) else: if start_offset and offset and start_offset <= offset: msg_nocr = orig_msg_nocr msg = orig_msg pass msg_nocr(' ') if offset == lasti: msg_nocr(format_token(Mformat.Arrow, '-->', highlight=highlight)) else: msg_nocr(' ') if offset in labels: msg_nocr(format_token(Mformat.Arrow, '>>', highlight=highlight)) else: msg_nocr(' ') msg_nocr(repr(offset).rjust(4)) msg_nocr(' ') msg_nocr(format_token(Mformat.Opcode, instr.opname.ljust(20), highlight=highlight)) msg_nocr(repr(instr.arg).ljust(10)) msg_nocr(' ') # Show argval? msg(format_token(Mformat.Name, instr.argrepr.ljust(20), highlight=highlight)) pass return code, offset
def disassemble_bytes(orig_msg, orig_msg_nocr, code, lasti=-1, cur_line=0, start_line=-1, end_line=None, relative_pos=False, varnames=(), names=(), constants=(), cells=(), freevars=(), linestarts={}, highlight='light', start_offset=0, end_offset=None): """Disassemble byte string of code. If end_line is negative it counts the number of statement linestarts to use.""" statement_count = 10000 if end_line is None: end_line = 10000 elif relative_pos: end_line += start_line -1 pass labels = findlabels(code) null_print = lambda x: None if start_line > cur_line: msg_nocr = null_print msg = null_print else: msg_nocr = orig_msg_nocr msg = orig_msg for instr in get_instructions_bytes(code, opc, varnames, names, constants, cells, linestarts): offset = instr.offset if end_offset and offset > end_offset: break if instr.starts_line: if offset: msg("") cur_line = instr.starts_line if (start_line and ((start_line > cur_line) or start_offset and start_offset > offset)) : msg_nocr = null_print msg = null_print else: statement_count -= 1 msg_nocr = orig_msg_nocr msg = orig_msg pass if ((cur_line > end_line) or (end_offset and offset > end_offset)): break msg_nocr(format_token(Mformat.LineNumber, "%4d" % cur_line, highlight=highlight)) else: if start_offset and offset and start_offset <= offset: msg_nocr = orig_msg_nocr msg = orig_msg pass msg_nocr(' ') if offset == lasti: msg_nocr(format_token(Mformat.Arrow, '-->', highlight=highlight)) else: msg_nocr(' ') if offset in labels: msg_nocr(format_token(Mformat.Arrow, '>>', highlight=highlight)) else: msg_nocr(' ') msg_nocr(repr(offset).rjust(4)) msg_nocr(' ') msg_nocr(format_token(Mformat.Opcode, instr.opname.ljust(20), highlight=highlight)) msg_nocr(repr(instr.arg).ljust(10)) msg_nocr(' ') # Show argval? msg(format_token(Mformat.Name, instr.argrepr.ljust(20), highlight=highlight)) pass return code, offset