Beispiel #1
0
def check_cfg(fn: Callable, cfg: ControlFlowGraph):
    bytecode = fn.__code__.co_code

    # Check that all get_node returns the correct node
    # for all instruction offsets in bytecode
    current_block = cfg.offset2block[0]
    end_offset = current_block.bb.end_offset
    need_new_block = False
    offset2block = cfg.offset2block
    cached_offsets = len(offset2block)
    cache_diff = 0
    for inst in get_instructions_bytes(bytecode, opc):
        offset = inst.offset
        if need_new_block:
            current_block = offset2block[offset]
            end_offset = current_block.bb.end_offset
        if offset == end_offset:
            need_new_block = True
        else:
            # Increment number of entries added to cache after next cfg.get_node
            cache_diff += 1
            need_new_block = False

        assert current_block == cfg.get_node(offset)

    # Next check that all cfg.offset2block is populated
    # for all instruction offsets in bytecode as a result of
    # asking for each offset above
    assert all((inst.offset in offset2block
                for inst in get_instructions_bytes(bytecode, opc)))

    # Assert offset originally was in offset2block or was added in cache
    assert len(offset2block) == cached_offsets + cache_diff
    return
Beispiel #2
0
def nop_jump_junk(co, opc):
    # Get the first jump target.
    first_jump_inst = next(get_instructions_bytes(co.co_code, opc))
    first_jump_target = first_jump_inst.argval

    # Abuse the text header to find the second jump.
    header_start_target = co.co_code.find(b'\r\n\r\n========')
    has_pyprotect_header = header_start_target != -1

    # Get the start of the JUMP_FORWARD, right before the header.
    jump_forward_inst_size = instruction_size(opc.opmap['JUMP_FORWARD'], opc)
    second_jump_start = header_start_target - jump_forward_inst_size

    # Go over the existing code and decide whether to copy each byte.
    fixed_code = bytearray()
    for i in range(len(co.co_code)):
        if (i < first_jump_target):
            # Replace the first anti-disassembly jump with NOPs
            fixed_code.append(opc.opmap['NOP'])
        elif has_pyprotect_header and (i >= second_jump_start):
            # Don't copy the second AD jump or anything after it.
            pass
        else:
            # Copy everything else.
            fixed_code.append(co.co_code[i])

    # Set fixed code back on co.
    co.co_code = fixed_code
    co.freeze()
Beispiel #3
0
def recursive_fix_stage3(co2):
    co2.co_name = str(co2.co_name)

    # Replace the jump+junk anti-disassembler instructions with NOP.
    nop_jump_junk(co2, opc)

    # Remove the NOP's and recalculate the offsets.
    co2 = remove_nops(co2, opc, version)

    insts = list(get_instructions_bytes(co2, opc))
    for (k, v) in enumerate(insts):
        if (insts[k + 0].opname == 'LOAD_CONST'
                and insts[k + 1].opname == 'MAKE_FUNCTION'
                and insts[k + 2].opname == 'STORE_NAME'):
            co2.co_consts[insts[k + 0].arg].co_name = str(
                co2.co_names[insts[k + 2].arg])

    # Recursively fix up the code objects in `co_consts`
    consts = list(co2.co_consts)
    for (k, v) in enumerate(consts):
        if xdis.code.iscode(v) and 'pyprotect' in v.co_filename:
            consts[k] = recursive_fix_stage3(v)
            pass

    co2.co_consts = tuple(consts)
    co2.freeze()

    return co2
 def _populate_lines(self):
     code = self.code
     last_line_info = None
     for instr in get_instructions_bytes(
         bytecode=code.co_code,
         opc=self.opc,
         varnames=code.co_varnames,
         names=code.co_names,
         constants=code.co_consts,
         cells=code.co_cellvars + code.co_freevars,
         linestarts=self.linestarts,
     ):
         offset = instr.offset
         self.offsets.append(offset)
         self.instructions.append(instr)
         if instr.starts_line:
             if last_line_info:
                 self.lines.append(last_line_info)
                 pass
             last_line_info = LineOffsets(instr.starts_line, [offset], code)
         else:
             last_line_info.offsets.append(offset)
             pass
         pass
     self.lines.append(last_line_info)
     if self.include_children:
         for c in code.co_consts:
             if iscode(c):
                 code_info = LineOffsetInfo(self.opc, c, True)
                 self.children[code_info.name] = code_info
                 self.lines += code_info.lines
                 pass
             pass
         pass
Beispiel #5
0
def remove_nops(co, opc, version):
    asm = Assembler(str(version))
    asm.code = co
    asm.code.instructions = []

    # Disassemble the original instructions,
    # ignore if NOP, recalculate if absolute jump
    # then append them to our assembler.
    removed_nop_count = 0
    cur_offset = 0
    for inst in get_instructions_bytes(asm.code, opc):
        if inst.opname is 'NOP':
            removed_nop_count += 1
            continue

        # Recalculate absolute jump arg.
        arg = inst.arg
        if inst.optype is 'jabs':
            joff = inst.arg - inst.offset
            arg = cur_offset + joff

        # Create a new `xasm` Instruction.
        new_inst = Instruction()
        new_inst.opcode = inst.opcode
        new_inst.arg = arg
        new_inst.offset = cur_offset
        new_inst.line_no = 0

        # Add the instruction to the assembler.
        asm.code.instructions.append(new_inst)
        cur_offset += inst.inst_size

    code = create_code(asm, [], [])

    # HACK/FIX: xasm's `create_code` makes co_code a str on opcode version < 3,
    # when it should _probably_ be checking the interpreter version instead.
    # uncompyle6 requires this to be bytes-like, so we convert it.
    if sys.version_info > (3, 0, 0):
        code.co_code = bytes([ord(c) for c in code.co_code])

    return code
def disassemble_bytes(orig_msg, orig_msg_nocr, code, lasti=-1, cur_line=0,
                      start_line=-1, end_line=None, relative_pos=False,
                      varnames=(), names=(), constants=(), cells=(),
                      freevars=(), linestarts={}, highlight='light',
                      start_offset=0, end_offset=None):
    """Disassemble byte string of code. If end_line is negative
    it counts the number of statement linestarts to use."""
    statement_count = 10000
    if end_line is None:
        end_line = 10000
    elif relative_pos:
        end_line += start_line -1
        pass

    labels = findlabels(code)

    null_print = lambda x: None
    if start_line > cur_line:
        msg_nocr = null_print
        msg = null_print
    else:
        msg_nocr = orig_msg_nocr
        msg = orig_msg

    for instr in get_instructions_bytes(code, opc, varnames, names,
                                        constants, cells, linestarts):
        offset = instr.offset
        if end_offset and offset > end_offset:
            break

        if instr.starts_line:
            if offset:
                msg("")

            cur_line = instr.starts_line
            if (start_line and ((start_line > cur_line) or
                                start_offset and start_offset > offset)) :
                msg_nocr = null_print
                msg = null_print
            else:
                statement_count -= 1
                msg_nocr = orig_msg_nocr
                msg = orig_msg
                pass
            if ((cur_line > end_line) or
                (end_offset and offset > end_offset)):
                break
            msg_nocr(format_token(Mformat.LineNumber,
                                  "%3d" % cur_line,
                                  highlight=highlight))
        else:
            if start_offset and offset and start_offset <= offset:
                msg_nocr = orig_msg_nocr
                msg = orig_msg
                pass
            msg_nocr('   ')

        if offset == lasti: msg_nocr(format_token(Mformat.Arrow, '-->',
                                                  highlight=highlight))
        else: msg_nocr('   ')
        if offset in labels: msg_nocr(format_token(Mformat.Arrow, '>>',
                                                   highlight=highlight))
        else: msg_nocr('  ')
        msg_nocr(repr(offset).rjust(4))
        msg_nocr(' ')
        msg_nocr(format_token(Mformat.Opcode,
                              instr.opname.ljust(20),
                              highlight=highlight))
        msg_nocr(repr(instr.arg).ljust(10))
        msg_nocr(' ')
        # Show argval?
        msg(format_token(Mformat.Name,
                         instr.argrepr.ljust(20),
                         highlight=highlight))
        pass

    return code, offset
Beispiel #7
0
def disassemble_bytes(orig_msg, orig_msg_nocr, code, lasti=-1, cur_line=0,
                      start_line=-1, end_line=None, relative_pos=False,
                      varnames=(), names=(), constants=(), cells=(),
                      freevars=(), linestarts={}, highlight='light',
                      start_offset=0, end_offset=None):
    """Disassemble byte string of code. If end_line is negative
    it counts the number of statement linestarts to use."""
    statement_count = 10000
    if end_line is None:
        end_line = 10000
    elif relative_pos:
        end_line += start_line -1
        pass

    labels = findlabels(code)

    null_print = lambda x: None
    if start_line > cur_line:
        msg_nocr = null_print
        msg = null_print
    else:
        msg_nocr = orig_msg_nocr
        msg = orig_msg

    for instr in get_instructions_bytes(code, opc, varnames, names,
                                        constants, cells, linestarts):
        offset = instr.offset
        if end_offset and offset > end_offset:
            break

        if instr.starts_line:
            if offset:
                msg("")

            cur_line = instr.starts_line
            if (start_line and ((start_line > cur_line) or
                                start_offset and start_offset > offset)) :
                msg_nocr = null_print
                msg = null_print
            else:
                statement_count -= 1
                msg_nocr = orig_msg_nocr
                msg = orig_msg
                pass
            if ((cur_line > end_line) or
                (end_offset and offset > end_offset)):
                break
            msg_nocr(format_token(Mformat.LineNumber,
                                  "%4d" % cur_line,
                                  highlight=highlight))
        else:
            if start_offset and offset and start_offset <= offset:
                msg_nocr = orig_msg_nocr
                msg = orig_msg
                pass
            msg_nocr('    ')

        if offset == lasti: msg_nocr(format_token(Mformat.Arrow, '-->',
                                                  highlight=highlight))
        else: msg_nocr('   ')
        if offset in labels: msg_nocr(format_token(Mformat.Arrow, '>>',
                                                   highlight=highlight))
        else: msg_nocr('  ')
        msg_nocr(repr(offset).rjust(4))
        msg_nocr(' ')
        msg_nocr(format_token(Mformat.Opcode,
                              instr.opname.ljust(20),
                              highlight=highlight))
        msg_nocr(repr(instr.arg).ljust(10))
        msg_nocr(' ')
        # Show argval?
        msg(format_token(Mformat.Name,
                         instr.argrepr.ljust(20),
                         highlight=highlight))
        pass

    return code, offset