예제 #1
0
 def _addcompare(self, opname, opcode, arg, argrepr, target):
     arg, argval = self._mapcompare(arg)
     if not argrepr:
         argrepr = argval
     instr = dis.Instruction(opname, opcode, arg, argrepr, argrepr,
                             self._offset, self._line, target)
     return self._addinstr(instr)
예제 #2
0
def mk_extended_arg(arg, extended):
    return dis.Instruction(opname='EXTENDED_ARG',
                           opcode=dis.EXTENDED_ARG,
                           arg=arg,
                           argval=arg,
                           argrepr=None,
                           offset=extended.offset,
                           starts_line=extended.starts_line,
                           is_jump_target=extended.is_jump_target)
예제 #3
0
 def _addmisc(self, opname, opcode, arg, argrepr, target):
     if not argrepr:
         argrepr = arg
     try:
         arg = int(arg)
     except ValueError:
         raise SyntaxError("{} requires numeric arg, not {}".format(
             opname, arg))
     instr = dis.Instruction(opname, opcode, arg, arg, argrepr,
                             self._offset, self._line, target)
     return self._addinstr(instr)
예제 #4
0
def _make_stable(gen):
    for instr in gen:
        yield _dis.Instruction(
            instr.opname,
            instr.opcode,
            instr.arg,
            instr.argval,
            _stable_repr(instr.argval),
            instr.offset,
            instr.starts_line,
            instr.is_jump_target,
        )
예제 #5
0
 def _addnamed(self, opname, opcode, arg, argrepr, target, addnames):
     if opcode in self._hasconst:
         namelist = self.constants
     elif opcode in self._hasfree:
         namelist = self.freevars
     elif opcode in self._hasname:
         namelist = self.names
     elif opcode in self._haslocal:
         namelist = self.varnames
     try:
         arg = int(arg)
     except ValueError:
         pass
     else:
         try:
             argval = namelist[arg]
         except LookupError:
             # TODO: Warn about this?
             argval = '#{}'.format(arg)
         if not argrepr:
             argrepr = argval
         instr = dis.Instruction(opname, opcode, arg, argval, argrepr,
                                 self._offset, self._line, target)
         return self._addinstr(instr)
     if arg[0] == '#':
         arg = ast.literal_eval(arg[1:])
     argval = arg
     try:
         arg = namelist.index(argval)
     except ValueError:
         if addnames:
             arg = len(namelist)
             namelist.append(argval)
         else:
             raise SyntaxError("No such name '{}'".format(argval))
     if not argrepr:
         argrepr = argval
     instr = dis.Instruction(opname, opcode, arg, argval, argrepr,
                             self._offset, self._line, target)
     return self._addinstr(instr)
예제 #6
0
def _make_stable(
    gen: Iterable[_dis.Instruction],
) -> Generator[_dis.Instruction, None, None]:
    for instr in gen:
        yield _dis.Instruction(
            instr.opname,
            instr.opcode,
            instr.arg,
            instr.argval,
            _stable_repr(instr.argval),
            instr.offset,
            instr.starts_line,
            instr.is_jump_target,
        )
예제 #7
0
def new_instruction(instr,
                    *,
                    opname=None,
                    opcode=None,
                    arg=None,
                    argval=None,
                    argrepr=None,
                    offset=None,
                    starts_line=None,
                    is_jump_target=None):
    # Creates a new instruction since namedtuples aren't mutable
    return dis.Instruction(opname or instr.opname, opcode or instr.opcode, arg
                           or instr.arg, argval or instr.argval, argrepr
                           or instr.argrepr, offset or instr.offset,
                           starts_line or instr.starts_line, is_jump_target
                           or instr.is_jump_target)
예제 #8
0
            arg = code[i + 1] | extended_arg
        else:
            arg = None
        # in stock dis, this is done only in the HAVE_ARGUMENT branch
        # and that is wrong, since it is different from ceval.c logic
        extended_arg = (arg << 8) if op == dis.EXTENDED_ARG else 0
        yield (i, op, arg)


dis._unpack_opargs = _unpack_opargs

ANY_ASCII = ord('S')
empty_instr = dis.Instruction(opname=None,
                              opcode=None,
                              arg=None,
                              argval=None,
                              argrepr=None,
                              offset=None,
                              starts_line=None,
                              is_jump_target=None)


class CodeWrapper:
    def __init__(self, code, **attrs):
        self.__dict__.update(attrs)
        self.code = code

    def __getattr__(self, attr):
        return getattr(self.code, attr)


class Transcoder:
예제 #9
0
    def __init__(self, code):

        self.__edge_num = {}

        bytecode = dis.Bytecode(code)
        self.basic_blocks = {
            -1:
            BasicBlock(
                dis.Instruction('FUNCTION_EXIT', 0, 0, '', '', -1, 0, False))
        }

        self._blockstack = BlockStack()

        # maintains the list of reachable instructions
        reachable_instructions = {0}

        # the targets of unreachable jump instructions
        unreachable_jump_targets = set()

        def is_reachable(instr):
            if instr.offset in reachable_instructions:
                return True
            elif instr.is_jump_target:
                if instr.offset in unreachable_jump_targets:
                    return False
                return True

        def predecessors_of(current_bb):
            for bb in self.basic_blocks.values():
                if current_bb.offset in bb.successors:
                    yield bb

        def join_blockstack_views(current_bb):
            blocks = set()

            for bb in predecessors_of(current_bb):
                if not is_reachable(bb.instruction):
                    continue

                try:
                    view = bb.blockstack_view.pop_until(current_bb.offset)
                except NotOnStackException:
                    view = bb.blockstack_view

                blocks.add(view.last_block)

            assert len(blocks) <= 1, \
                (blocks, current_bb, list(predecessors_of(current_bb)))

            if blocks:
                last_block = blocks.pop()
            else:
                last_block = None

            return BlockStackView(self._blockstack, last_block)

        def join_path_metadata(current_bb):
            metadata = {}
            for bb in predecessors_of(current_bb):
                if bb.path_metadata.get('has return', False):
                    metadata['has return'] = True
                if bb.path_metadata.get('has except', False):
                    metadata['has except'] = True

                broken_loops = bb.path_metadata.get('broken loops', [])
                metadata.setdefault('broken loops', []).extend(broken_loops)

            return metadata

        for instr in bytecode:

            if not is_reachable(instr):
                if instr.opname in ops.jumps:
                    unreachable_jump_targets.add(instr.argval)

                continue

            bb = BasicBlock(instr)
            self.basic_blocks[bb.offset] = bb

            # TODO: maintain path metdata (stuff like whether there's a
            # RETURN_VALUE along the path) that gets inherited like the
            # blockstack view

            successors, new_metadata, blockstack_view = compute_jump_targets(
                instr,
                join_path_metadata(bb),
                join_blockstack_views(bb),
            )

            reachable_instructions.update(set(successors))

            bb.successors = successors
            bb.blockstack_view = blockstack_view
            bb.path_metadata = new_metadata
예제 #10
0
파일: pyccfg.py 프로젝트: vrthra/pyccfg
    def __init__(self, myfn):
        def lstadd(hmap, key, val):
            if key not in hmap:
                hmap[key] = [val]
            else:
                hmap[key].append(val)

        enter = CFGNode(
            dis.Instruction('NOP',
                            opcode=dis.opmap['NOP'],
                            arg=0,
                            argval=0,
                            argrepr=0,
                            offset=0,
                            starts_line=0,
                            is_jump_target=False), 0)
        last = enter
        self.jump_to = {}
        self.opcodes = {}
        for i, ins in enumerate(dis.get_instructions(myfn)):
            byte = i * 2
            node = CFGNode(ins, byte)
            self.opcodes[byte] = node
            print(i, ins)
            if ins.opname in [
                    'LOAD_CONST', 'LOAD_FAST', 'STORE_FAST', 'COMPARE_OP',
                    'INPLACE_ADD', 'INPLACE_SUBTRACT', 'RETURN_VALUE',
                    'BINARY_MODULO', 'POP_BLOCK'
            ]:
                last.add_child(node)
                last = node
            elif ins.opname == 'POP_JUMP_IF_FALSE':
                print("will jump to", ins.arg)
                lstadd(self.jump_to, ins.arg, node)
                node.props['jmp'] = True
                last.add_child(node)
                last = node
            elif ins.opname == 'JUMP_FORWARD':
                node.props['jmp'] = True
                lstadd(self.jump_to, (i + 1) * 2 + ins.arg, node)
                print("will jump to", (i + 1) * 2 + ins.arg)
                last.add_child(node)
                last = node
            elif ins.opname == 'SETUP_LOOP':
                print("setuploop: ", byte, ins.arg)
                last.add_child(node)
                last = node
            elif ins.opname == 'JUMP_ABSOLUTE':
                print("will jump to", ins.arg)
                lstadd(self.jump_to, ins.arg, node)
                node.props['jmp'] = True
                last.add_child(node)
                last = node
            else:
                assert False
        for byte in self.opcodes:
            if byte in self.jump_to:
                node = self.opcodes[byte]
                assert node.i.is_jump_target
                for b in self.jump_to[byte]:
                    b.add_child(node)
예제 #11
0
    def execute(self, starting_stack=[], starting_env={}):
        super().execute(starting_stack, starting_env)

        # We use a separate instance of the decompiler to process the code
        # inside the loop. We have to add a placeholder for the instruction
        # following the end of the loop.
        instructions = self.instructions + \
            [dis.Instruction('AFTER_LOOP', -1, None, None,
             None, self.instruction.argval, None, True)]

        # For some reason, breaks are translated into BREAK_LOOP instructions
        # instead of the standard JUMP_ABSOLUTE, so we must fix that manually.
        for i in range(len(instructions)):
            instr = instructions[i]

            if instr.opname == 'BREAK_LOOP':
                instructions[i] = dis.Instruction(
                    'JUMP_ABSOLUTE', 113, self.instruction.argval,
                    self.instruction.argval, None, instr.offset,
                    instr.starts_line, instr.is_jump_target)

        decompiler = Decompiler()
        decompiler.comprehension_mode = self.context.comprehension_mode
        decompiler.build_graph(instructions, True)

        start_block = decompiler.first_block
        last_block = decompiler.current_block

        decompiler.sort_blocks()
        decompiler.detach_unreachable()

        # display_graph(decompiler)

        # We then identify the edges which jump back to the start_block, and
        # make them point to a placeholder block instead. This block, once
        # expressed, will turn into a call to on_loop.
        loop_placeholder = PlaceholderBlock(
            decompiler, Application(Identifier('on_loop'), Null()))

        decompiler.blocks.append(loop_placeholder)

        previous_predecessors = start_block.predecessors
        start_block.predecessors = []

        for (predecessor, edge_type) in previous_predecessors:
            if predecessor.index < start_block.index:
                start_block.predecessors.append((predecessor, edge_type))
            elif edge_type == JUMP_FLOW:
                predecessor.next_jumped = loop_placeholder
                loop_placeholder.predecessors.append(
                    (predecessor, JUMP_FLOW))
            else:
                predecessor.next = loop_placeholder
                loop_placeholder.predecessors.append(
                    (predecessor, NORMAL_FLOW))

        # We also replace all the references to the last block, which only
        # contains the AFTER_LOOP instruction that we added earlier, with a
        # placeholder block which will turn into a call to on_after.
        after_placeholder = PlaceholderBlock(
            decompiler, Application(Identifier('on_after'), Null()))

        after_placeholder.index = last_block.index
        decompiler.blocks[last_block.index] = after_placeholder

        for (predecessor, edge_type) in last_block.predecessors:
            if edge_type == JUMP_FLOW:
                predecessor.next_jumped = after_placeholder
                after_placeholder.predecessors.append(
                    (predecessor, JUMP_FLOW))
            else:
                predecessor.next = after_placeholder
                after_placeholder.predecessors.append(
                    (predecessor, NORMAL_FLOW))

        # This is not pretty, but we must remove the edge that is created
        # between a block and the one which follows it.
        loop_placeholder.next = None
        after_placeholder.next = None

        # display_graph(decompiler)

        self.loop_placeholder, self.after_placeholder, self.decompiler =\
            loop_placeholder, after_placeholder, decompiler
예제 #12
0
 def _addjump(self, opname, opcode, arg, argrepr, target):
     # Nothing much to do here; see _fixup for the hard bit
     instr = dis.Instruction(opname, opcode, arg, argrepr, argrepr,
                             self._offset, self._line, target)
     return self._addinstr(instr)
예제 #13
0
 def _addnoarg(self, opname, opcode, target):
     instr = dis.Instruction(opname, opcode, None, None, None, self._offset,
                             self._line, target)
     return self._addinstr(instr)
예제 #14
0
    def instrument(bytecode):
        """
        The primary method of instrumenting code, which involves injecting a bytecode counter between every instruction to be executed

        :param bytecode: a code object, the bytecode submitted by the player
        :return: a new code object that has been injected with our bytecode counter
        """

        # Ensure all code constants (e.g. list comprehensions) are also instrumented.
        new_consts = []
        for i, constant in enumerate(bytecode.co_consts):
            if type(constant) == CodeType:
                new_consts.append(Instrument.instrument(constant))
            else:
                new_consts.append(constant)
        new_consts = tuple(new_consts)

        instructions = list(dis.get_instructions(bytecode))

        function_name_index = len(bytecode.co_names)  # we will be inserting our __instrument__ call at the end of co_names

        # the injection, which consists of a function call to an __instrument__ method which increments bytecode
        # these three instructions will be inserted between every line of instrumented code
        injection = [
            dis.Instruction(opcode=116, opname='LOAD_GLOBAL', arg=function_name_index%256, argval='__instrument__', argrepr='__instrument__', offset=None, starts_line=None, is_jump_target=False),
            dis.Instruction(opcode=131, opname='CALL_FUNCTION', arg=0, argval=0, argrepr=0, offset=None, starts_line=None, is_jump_target=False),
            dis.Instruction(opcode=1, opname='POP_TOP', arg=None, argval=None, argrepr=None, offset=None, starts_line=None, is_jump_target=False)
        ]
        #extends the opargs so that it can store the index of __instrument__
        while function_name_index > 255: #(255 = 2^8 -1 = 1 oparg)
            function_name_index >>= 8
            injection = [
                dis.Instruction(
                    opcode=144,
                    opname='EXTENDED_ARGS',
                    arg=function_name_index%256,
                    argval=function_name_index%256,
                    argrepr=function_name_index%256,
                    offset=None,
                    starts_line=None,
                    is_jump_target=False
                )
            ] + injection

        # For maintenance we add an empty jump_to field to each instruction
        for i, instruction in enumerate(instructions):
            instructions[i] = Instruction(instruction)

        # Next, we cache a reference to the jumpers to each jump target in the targets
        for i, instruction in enumerate(instructions):
            # We're only looking for jumpers
            if not instruction.is_jumper():
                continue

            target = [t for t in instructions if instruction.argval == t.offset][0]
            instruction.jump_to = target

            # If any targets jump to themselves, that's not kosher.
            if instruction == target:
                raise SyntaxError('No self-referential loops.')

        unsafe = {110, 113, 114, 115, 116, 120, 124, 125, 131}  # bytecode ops that break the instrument

        # We then inject the injection before every call, except for those following an EXTENDED_ARGS.
        cur_index = -1
        for (cur, last) in zip(instructions[:], [None]+instructions[:-1]):
            cur_index += 1
            if last is not None and last.opcode == 144: #EXTEND_ARG
                continue

            if last is not None and last.opcode in unsafe:
                continue

            for j, inject in enumerate(injection):
                injected_instruction = Instruction(inject)
                injected_instruction.was_there = False # keeping track of the instructions added by us
                instructions.insert(cur_index + j, injected_instruction)
            cur_index += len(injection)


        # Iterate through instructions. If it's a jumper, calculate the new correct offset. For each new offset, if it
        # is too large to fit in the current number of EXTENDED_ARGS, inject a new EXTENDED_ARG before it. If you never
        # insert a new EXTENDED_ARGS, break out of the loop.
        fixed = False
        while not fixed:
            fixed = True

            i = 0
            for instruction in instructions[:]:
                instruction.offset = 2 * i

                if not instruction.is_jumper():
                    i += 1
                    continue

                correct_offset = instruction.calculate_offset(instructions)
                instruction.arg = correct_offset % 256
                correct_offset >>= 8

                extended_args = 0
                while correct_offset > 0:
                    # Check if there is already an EXTENDED_ARGS behind
                    if i > extended_args and instructions[i - extended_args - 1].opcode == 144:
                        instructions[i - extended_args - 1].arg = correct_offset % 256

                    # Otherwise, insert a new one
                    else:
                        instructions.insert(i, Instruction.ExtendedArgs(correct_offset % 256))
                        instruction.extra_extended_args += 1
                        i += 1
                        fixed = False

                    correct_offset >>= 8
                    extended_args += 1
                i += 1
        #Maintaining correct line info ( traceback bug fix)
        #co_lnotab stores line information in Byte form
        # It stores alterantively, the number of instructions to the next increase in line number and
        # the increase in line number then
        #We need to ensure that these are bytes (You might want to break an increase into two see the article or code below)
        #The code did not update these bytes, we need to update the number of instructions before the beginning of each line
        #It should be similar to the way the jump to statement were fixed, I tried to mimick them but failed, I feel like I do not inderstand instruction.py
        # I am overestimating the number of instructions before the start of the line in this fix
        # you might find the end of this article helpful: https://towardsdatascience.com/understanding-python-bytecode-e7edaae8734d
        old_lnotab = {} #stores the old right info in a more usefull way (maps instruction num to line num)
        i = 0
        line_num = 0 #maintains line number by adding differences
        instruction_num = 0 #maintains the instruction num by addind differences
        while 2*i < len(bytecode.co_lnotab):
            instruction_num += bytecode.co_lnotab[2 * i]
            line_num += bytecode.co_lnotab[2 * i + 1]
            old_lnotab[instruction_num] = line_num
            i += 1
        #Construct a map from old instruction numbers, to new ones.
        num_injected = 0
        instruction_index = 0
        old_to_new_instruction_num = {}
        for instruction in instructions:
            if instruction.was_there:
                old_to_new_instruction_num[2 * (instruction_index - num_injected)] = 2 * instruction_index
            instruction_index += 1
            if not instruction.was_there:
                num_injected += 1
        new_lnotab = {}
        for key in old_lnotab:
            new_lnotab[old_to_new_instruction_num[key]] = old_lnotab[key]

        #Creating a differences list of integers, while ensuring integers in it are bytes
        pairs = sorted(new_lnotab.items())
        new_lnotab = []
        previous_pair = (0, 0)
        for pair in pairs:
            num_instructions = pair[0] - previous_pair[0]
            num_lines = pair[1] - previous_pair[1]
            while num_instructions > 127:
                new_lnotab.append(127)
                new_lnotab.append(0)
                num_instructions -= 127
            new_lnotab.append(num_instructions)
            while num_lines > 127:
                new_lnotab.append(127)
                new_lnotab.append(0)
                num_lines -= 127
            new_lnotab.append(num_lines)
            previous_pair = pair
        #tranfer to bytes and we are good :)
        new_lnotab = bytes(new_lnotab)

        # Finally, we repackage up our instructions into a byte string and use it to build a new code object
        byte_array = [[inst.opcode, 0 if inst.arg is None else inst.arg % 256] for inst in instructions]
        new_code = bytes(sum(byte_array, []))

        # Make sure our code can locate the __instrument__ call
        new_names = tuple(bytecode.co_names) + ('__instrument__', )

        return Instrument.build_code(bytecode, new_code, new_names, new_consts, new_lnotab)