def compare_bytes_sequence(self, code1, code2, inserted_code_size):
        """
        Compare code after modification and the real code
        Since we add POP_JUMP_IF_TRUE instruction, we can't compare modified code and the real code. That's why we
        allow some inaccuracies while code comparison
        :param code1: result code after modification
        :param code2: a real code for checking
        :param inserted_code_size: size of inserted code
        """
        seq1 = [(offset, op, arg) for offset, op, arg in dis._unpack_opargs(code1)]
        seq2 = [(offset, op, arg) for offset, op, arg in dis._unpack_opargs(code2)]
        assert len(seq1) == len(seq2), "Bytes sequences have different lengths %s != %s" % (len(seq1), len(seq2))
        for i in range(len(seq1)):
            of, op1, arg1 = seq1[i]
            _, op2, arg2 = seq2[i]
            if op1 != op2:
                if op1 == 115 and op2 == 1:
                    # it's ok, because we added POP_JUMP_IF_TRUE manually, but it's POP_TOP in the real code
                    # inserted code - 2 (removed return instruction) - real code inserted
                    # Jump should be done to the beginning of inserted fragment
                    self.assertEqual(arg1, of - (inserted_code_size - 2))
                    continue
                elif op1 == EXTENDED_ARG and op2 == 12:
                    # we added a real UNARY_NOT to balance EXTENDED_ARG added by new jump instruction
                    # i.e. inserted code size was increased as well
                    inserted_code_size += 2
                    continue

            self.assertEqual(op1, op2, "Different operators at offset {}".format(of))
            if arg1 != arg2:
                if op1 in (100, 101, 106, 116):
                    # Sometimes indexes of variable names and consts may be different, when we insert them, it's ok
                    continue
                else:
                    self.assertEquals(arg1, arg2, "Different arguments at offset {}".format(of))
    def compare_bytes_sequence(self, code1, code2, inserted_code_size):
        """
        Compare code after modification and the real code
        Since we add POP_JUMP_IF_TRUE instruction, we can't compare modified code and the real code. That's why we
        allow some inaccuracies while code comparison
        :param code1: result code after modification
        :param code2: a real code for checking
        :param inserted_code_size: size of inserted code
        """
        seq1 = [(offset, op, arg) for offset, op, arg in dis._unpack_opargs(code1)]
        seq2 = [(offset, op, arg) for offset, op, arg in dis._unpack_opargs(code2)]
        self.assertTrue(len(seq1) == len(seq2), "Bytes sequences have different lengths")
        for i in range(len(seq1)):
            of, op1, arg1 = seq1[i]
            _, op2, arg2 = seq2[i]
            if op1 != op2:
                if op1 == 115 and op2 == 1:
                    # it's ok, because we added POP_JUMP_IF_TRUE manually, but it's POP_TOP in the real code
                    # inserted code - 2 (removed return instruction) - real code inserted
                    # Jump should be done to the beginning of inserted fragment
                    self.assertEqual(arg1, of - (inserted_code_size - 2))
                    continue
                elif op1 == EXTENDED_ARG and op2 == 12:
                    # we added a real UNARY_NOT to balance EXTENDED_ARG added by new jump instruction
                    # i.e. inserted code size was increased as well
                    inserted_code_size += 2
                    continue

            self.assertEqual(op1, op2, "Different operators at offset {}".format(of))
            if arg1 != arg2:
                if op1 in (100, 101, 106, 116):
                    # Sometimes indexes of variable names and consts may be different, when we insert them, it's ok
                    continue
                else:
                    self.assertEquals(arg1, arg2, "Different arguments at offset {}".format(of))
 def compare_bytes_sequence(self, code1, code2):
     seq1 = [(offset, op, arg) for offset, op, arg in dis._unpack_opargs(code1)]
     seq2 = [(offset, op, arg) for offset, op, arg in dis._unpack_opargs(code2)]
     self.assertTrue(len(seq1) == len(seq2), "Bytes sequences have different lengths")
     for i in range(len(seq1)):
         of, op1, arg1 = seq1[i]
         _, op2, arg2 = seq2[i]
         self.assertEqual(op1, op2, "Different operators at offset {}".format(of))
         if arg1 != arg2:
             if op1 in (100, 101, 106, 116):
                 # Sometimes indexes of variable names and consts may be different, when we insert them, it's ok
                 continue
             else:
                 self.assertEquals(arg1, arg2, "Different arguments at offset {}".format(of))
Esempio n. 4
0
def decode_codeobj(codeobj):
    # adapted from dis.dis
    if is_py3k:
        codestr = codeobj.co_code
    else:
        codestr = [ord(ch) for ch in codeobj.co_code]
    free = None
    for i, op, oparg in _unpack_opargs(codestr):
        opname = opcode.opname[op]
        if oparg is not None:
            if op in opcode.hasconst:
                argval = codeobj.co_consts[oparg]
            elif op in opcode.hasname:
                argval = codeobj.co_names[oparg]
            elif op in opcode.hasjrel:
                argval = i + oparg + CODE_HAVEARG_SIZE
            elif op in opcode.haslocal:
                argval = codeobj.co_varnames[oparg]
            elif op in opcode.hascompare:
                argval = opcode.cmp_op[oparg]
            elif op in opcode.hasfree:
                if free is None:
                    free = codeobj.co_cellvars + codeobj.co_freevars
                argval = free[oparg]

        yield (opname, argval)
Esempio n. 5
0
 def scan_opcodes(
     self, co: types.CodeType
 ) -> Generator[Union[Tuple[Literal["store"], Tuple[str]], Tuple[
         Literal["absolute_import"], Tuple[List[str], str]], Tuple[
             Literal["relative_import"], Tuple[int, List[str], str]], ],
                None, None, ]:
     # Scan the code, and yield 'interesting' opcode combinations
     code = co.co_code
     names = co.co_names
     consts = co.co_consts
     opargs = [
         #  pylint: disable=protected-access
         (op, arg) for _, op, arg in dis._unpack_opargs(
             code)  # type: ignore[attr-defined]
         if op != EXTENDED_ARG
     ]
     for i, (op, oparg) in enumerate(opargs):
         if op in STORE_OPS:
             yield STORE, (names[oparg], )
             continue
         if (op == IMPORT_NAME and i >= 2
                 and opargs[i - 1][0] == opargs[i - 2][0] == LOAD_CONST):
             level = consts[opargs[i - 2][1]]
             fromlist = consts[opargs[i - 1][1]]
             if level == 0:  # absolute import
                 yield ABSOLUTE_IMPORT, (fromlist, names[oparg])
             else:  # relative import
                 yield RELATIVE_IMPORT, (level, fromlist, names[oparg])
             continue
Esempio n. 6
0
def decode_codeobj(codeobj):
    # adapted from dis.dis
    if is_py3k:
        codestr = codeobj.co_code
    else:
        codestr = [ord(ch) for ch in codeobj.co_code]
    free = None
    for i, op, oparg in _unpack_opargs(codestr):
        opname = opcode.opname[op]
        if oparg is not None:
            if op in opcode.hasconst:
                argval = codeobj.co_consts[oparg]
            elif op in opcode.hasname:
                argval = codeobj.co_names[oparg]
            elif op in opcode.hasjrel:
                argval = i + oparg + CODE_HAVEARG_SIZE
            elif op in opcode.haslocal:
                argval = codeobj.co_varnames[oparg]
            elif op in opcode.hascompare:
                argval = opcode.cmp_op[oparg]
            elif op in opcode.hasfree:
                if free is None:
                    free = codeobj.co_cellvars + codeobj.co_freevars
                argval = free[oparg]

        yield (opname, argval)
Esempio n. 7
0
def scan_opcodes(co):
    import dis
    code = co.co_code
    names = co.co_names
    consts = co.co_consts
    STORE_OPS = (90, 97)
    IMPORT_NAME = 108
    IMPORT_FROM = 109
    IMPORT_STAR = 84
    LOAD_CONST = 100

    opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
              if op != dis.EXTENDED_ARG]
    for i, (op, oparg) in enumerate(opargs):
        if op in STORE_OPS:
            yield "store", (names[oparg], )
            continue
        if op == IMPORT_FROM:
            yield "import from", names[oparg]

        if (op == IMPORT_NAME and i >= 2
                and opargs[i - 1][0] == opargs[i - 2][0] == LOAD_CONST):
            level = consts[opargs[i - 2][1]]
            fromlist = consts[opargs[i - 1][1]]
            if level == 0:  # absolute import
                yield "absolute_import", (fromlist, names[oparg])
            else:  # relative import
                yield "relative_import", (level, fromlist, names[oparg])
            continue
Esempio n. 8
0
def iter_opcodes(code):
    """Iterate over (op, arg) parameters in the bytecode of code.

    Taken from the code of the dis module.

    """
    if sys.version_info >= (3, 4):
        # Py3 has a function for this
        for _, op, arg in dis._unpack_opargs(code):
            yield (op, arg)
        return

    n = len(code)
    i = 0
    extended_arg = 0
    while i < n:
        c = code[i]
        op = ord(c)
        i = i + 1
        if op >= dis.HAVE_ARGUMENT:
            oparg = ord(code[i]) + ord(code[i + 1]) * 256 + extended_arg
            extended_arg = 0
            i = i + 2
            if op == dis.EXTENDED_ARG:
                extended_arg = oparg * long(65536)
            else:
                yield op, oparg
Esempio n. 9
0
def _update_label_offsets(code_obj, offset_of_inserted_code, size_of_inserted_code):
    """
    Update labels for the relative and absolute jump targets
    :param code_obj: code to modify
    :param offset_of_inserted_code: offset for the inserted code
    :param offset_of_inserted_code: size of the inserted code
    :return: bytes sequence with modified labels
    """
    offsets_for_modification = []
    for offset, op, arg in dis._unpack_opargs(code_obj):
        if arg is not None:
            if op in dis.hasjrel:
                # has relative jump target
                label = offset + 2 + arg
                if offset < offset_of_inserted_code < label:
                    # change labels for relative jump targets if code was inserted inside
                    offsets_for_modification.append(offset)
            elif op in dis.hasjabs:
                # change label for absolute jump if code was inserted before it
                if offset_of_inserted_code <= arg:
                    offsets_for_modification.append(offset)
    code_list = list(code_obj)
    for i in range(0, len(code_obj), 2):
        op = code_list[i]
        if i in offsets_for_modification and op >= dis.HAVE_ARGUMENT:
            code_list[i + 1] += size_of_inserted_code
    return bytes(code_list)
Esempio n. 10
0
    def compare_bytes_sequence(self, code1, code2, inserted_code_size):
        """
        Compare code after modification and the real code
        Since we add POP_JUMP_IF_TRUE instruction, we can't compare modified code and the real code. That's why we
        allow some inaccuracies while code comparison
        :param code1: result code after modification
        :param code2: a real code for checking
        :param inserted_code_size: size of inserted code
        """
        seq1 = [(offset, op, arg)
                for offset, op, arg in dis._unpack_opargs(list(code1.co_code))]
        seq2 = [(offset, op, arg)
                for offset, op, arg in dis._unpack_opargs(list(code2.co_code))]
        assert len(seq1) == len(
            seq2), "Bytes sequences have different lengths %s != %s" % (
                len(seq1), len(seq2))
        for i in range(len(seq1)):
            of, op1, arg1 = seq1[i]
            _, op2, arg2 = seq2[i]
            if op1 != op2:
                if op1 == 115 and op2 == 1:
                    # it's ok, because we added POP_JUMP_IF_TRUE manually, but it's POP_TOP in the real code
                    # inserted code - 2 (removed return instruction) - real code inserted
                    # Jump should be done to the beginning of inserted fragment
                    if IS_PY310_OR_GREATER:
                        self.assertEqual(arg1,
                                         (of - (inserted_code_size - 2)) // 2)
                    else:
                        self.assertEqual(arg1, of - (inserted_code_size - 2))
                    continue
                elif op1 == EXTENDED_ARG and op2 == 12:
                    # we added a real UNARY_NOT to balance EXTENDED_ARG added by new jump instruction
                    # i.e. inserted code size was increased as well
                    inserted_code_size += 2
                    continue

            self.assertEqual(op1, op2,
                             "Different operators at offset {}".format(of))

            if op1 in LOAD_OPCODES:
                # When comparing arguments of the load operations we shouldn't rely only on arguments themselves,
                # because their order may change. It's better to compare the actual values instead.
                self.compare_load_args(of, code1, code2, op1, arg1, arg2)
            elif arg1 != arg2:
                self.assertEquals(
                    arg1, arg2, "Different arguments at offset {}".format(of))
Esempio n. 11
0
    def _ScanCode(self, co, module: "Module", deferredImports, topLevel=True):
        """Scan code, looking for imported modules and keeping track of the
        constants that have been created in order to better tell which
        modules are truly missing."""
        arguments = []
        importedModule = None
        for opIndex, op, opArg in dis._unpack_opargs(co.co_code):

            # keep track of constants (these are used for importing)
            # immediately restart loop so arguments are retained
            if op == LOAD_CONST:
                arguments.append(co.co_consts[opArg])
                continue

            # import statement: attempt to import module
            if op == IMPORT_NAME:
                name = co.co_names[opArg]
                if len(arguments) >= 2:
                    relativeImportIndex, fromList = arguments[-2:]
                else:
                    relativeImportIndex = -1
                    fromList = arguments[0] if arguments else []
                if name not in module.exclude_names:
                    importedModule = self._ImportModule(
                        name, deferredImports, module, relativeImportIndex
                    )
                    if importedModule is not None:
                        if (
                            fromList
                            and fromList != ("*",)
                            and importedModule.path is not None
                        ):
                            self._EnsureFromList(
                                module,
                                importedModule,
                                fromList,
                                deferredImports,
                            )

            # import * statement: copy all global names
            elif op == IMPORT_STAR and topLevel and importedModule is not None:
                module.global_names.update(importedModule.global_names)

            # store operation: track only top level
            elif topLevel and op in STORE_OPS:
                name = co.co_names[opArg]
                module.global_names.add(name)

            # reset arguments; these are only needed for import statements so
            # ignore them in all other cases!
            arguments = []

        # Scan the code objects from function & class definitions
        for constant in co.co_consts:
            if isinstance(constant, type(co)):
                self._ScanCode(
                    constant, module, deferredImports, topLevel=False
                )
Esempio n. 12
0
def _update_label_offsets(code_obj, breakpoint_offset, breakpoint_code_list):
    """
    Update labels for the relative and absolute jump targets
    :param code_obj: code to modify
    :param breakpoint_offset: offset for the inserted code
    :param breakpoint_code_list: size of the inserted code
    :return: bytes sequence with modified labels; list of tuples (resulting offset, list of code instructions) with
    information about all inserted pieces of code
    """
    inserted_code = list()
    # the list with all inserted pieces of code
    inserted_code.append((breakpoint_offset, breakpoint_code_list))
    code_list = list(code_obj)
    j = 0

    while j < len(inserted_code):
        current_offset, current_code_list = inserted_code[j]
        offsets_for_modification = []

        for offset, op, arg in dis._unpack_opargs(code_list):
            if arg is not None:
                if op in dis.hasjrel:
                    # has relative jump target
                    label = offset + 2 + arg
                    if offset < current_offset < label:
                        # change labels for relative jump targets if code was inserted inside
                        offsets_for_modification.append(offset)
                elif op in dis.hasjabs:
                    # change label for absolute jump if code was inserted before it
                    if current_offset < arg:
                        offsets_for_modification.append(offset)
        for i in range(0, len(code_list), 2):
            op = code_list[i]
            if i in offsets_for_modification and op >= dis.HAVE_ARGUMENT:
                new_arg = code_list[i + 1] + len(current_code_list)
                if new_arg <= MAX_BYTE:
                    code_list[i + 1] = new_arg
                else:
                    # if new argument > 255 we need to insert the new operator EXTENDED_ARG
                    extended_arg_code = [EXTENDED_ARG, new_arg >> 8]
                    code_list[i + 1] = new_arg & MAX_BYTE
                    inserted_code.append((i, extended_arg_code))

        code_list = code_list[:current_offset] + current_code_list + code_list[current_offset:]

        for k in range(len(inserted_code)):
            offset, inserted_code_list = inserted_code[k]
            if current_offset < offset:
                inserted_code[k] = (offset + len(current_code_list), inserted_code_list)
        j += 1

    return bytes(code_list), inserted_code
Esempio n. 13
0
def _disassemble(
        code: types.CodeType) -> Tuple[List[_Instruction], _JumpTable]:
    """
    Disassemble `code` into sequence of _Instruction. Also build up the jump table.
    """
    instrs = []
    linestarts = dict(dis.findlinestarts(code))
    for offset, op, arg in dis._unpack_opargs(code.co_code):
        if op == EXTENDED_ARG:
            continue
        instr = _Instruction(op, arg, offset)
        instr.lineno = linestarts.get(instr.offset)
        instrs.append(instr)

    table = _JumpTable(instrs)
    return instrs, table
Esempio n. 14
0
 def scan_opcodes(self, co):
     code = co.co_code
     names = co.co_names
     consts = co.co_consts
     opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
               if op != EXTENDED_ARG]
     for i, (op, oparg) in enumerate(opargs):
         if op in STORE_OPS:
             yield 'store', (names[oparg], )
             continue
         if op == IMPORT_NAME and i >= 2 and opargs[i - 1][0] == opargs[
                 i - 2][0] == LOAD_CONST:
             level = consts[opargs[i - 2][1]]
             fromlist = consts[opargs[i - 1][1]]
             if level == 0:
                 yield 'absolute_import', (fromlist, names[oparg])
             else:
                 yield 'relative_import', (level, fromlist, names[oparg])
             continue
 def scan_opcodes(self, co):
     # Scan the code, and yield 'interesting' opcode combinations
     code = co.co_code
     names = co.co_names
     consts = co.co_consts
     opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
               if op != EXTENDED_ARG]
     for i, (op, oparg) in enumerate(opargs):
         if op in STORE_OPS:
             yield "store", (names[oparg], )
             continue
         if (op == IMPORT_NAME and i >= 2
                 and opargs[i - 1][0] == opargs[i - 2][0] == LOAD_CONST):
             level = consts[opargs[i - 2][1]]
             fromlist = consts[opargs[i - 1][1]]
             if level == 0:  # absolute import
                 yield "absolute_import", (fromlist, names[oparg])
             else:  # relative import
                 yield "relative_import", (level, fromlist, names[oparg])
             continue
Esempio n. 16
0
 def scan_opcodes(self, co):
     # Scan the code, and yield 'interesting' opcode combinations
     code = co.co_code
     names = co.co_names
     consts = co.co_consts
     opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
               if op != EXTENDED_ARG]
     for i, (op, oparg) in enumerate(opargs):
         if op in STORE_OPS:
             yield "store", (names[oparg],)
             continue
         if (op == IMPORT_NAME and i >= 2
                 and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
             level = consts[opargs[i-2][1]]
             fromlist = consts[opargs[i-1][1]]
             if level == 0: # absolute import
                 yield "absolute_import", (fromlist, names[oparg])
             else: # relative import
                 yield "relative_import", (level, fromlist, names[oparg])
             continue
Esempio n. 17
0
def _disassemble_lambda(co):
    code = co.co_code
    ops = []
    for offset, op, arg in _unpack_opargs(code):
        args = []
        if arg is not None:
            if op in hasconst:
                args.append(co.co_consts[arg])  # LOAD_CONST
            elif op in hasname:
                args.append(co.co_names[arg])  # LOAD_CONST
            elif op in hasjrel:
                raise ValueError("unimpl: op in hasjrel")
            elif op in haslocal:
                args.append(co.co_varnames[arg])  # LOAD_FAST
            elif op in hascompare:
                args.append(cmp_op[arg])  # COMPARE_OP
            elif is_func(opname[op]) or is_func_kw(opname[op]):
                args.append(arg)  # oparg == nargs(fcn)
        ops.append([opname[op], args])

    return ops
Esempio n. 18
0
def _get_instructions_bytes(code, _file, _line_offset=0):
    for offset, op, arg in dis._unpack_opargs(code.co_code):
        argval = None
        argrepr = ''
        if arg is not None:
            argval = arg
            if op in dis.hasconst:
                argval, argrepr = dis._get_const_info(arg, code.co_consts)
            elif op in dis.hasname:
                argval, argrepr = dis._get_name_info(arg, code.co_names)
            elif op in dis.hasjrel:
                argval = offset + 2 + arg
                argrepr = "to " + repr(argval)
            elif op in dis.haslocal:
                argval, argrepr = dis._get_name_info(arg, code.co_varnames)
            elif op in dis.hascompare:
                argval = dis.cmp_op[arg]
                argrepr = argval
            elif op in dis.hasfree:
                argval, argrepr = dis._get_name_info(
                    arg, code.co_cellvars + code.co_freevars)
            elif op == dis.FORMAT_VALUE:
                argval = ((None, str, repr, ascii)[arg & 0x3], bool(arg & 0x4))
                argrepr = ('', 'str', 'repr', 'ascii')[arg & 0x3]
                if argval[1]:
                    if argrepr:
                        argrepr += ', '
                    argrepr += 'with format'
        disasm_line = ' ' * _line_offset
        disasm_line += repr(offset).ljust(6)
        disasm_line += dis.opname[op].ljust(21)
        if arg is not None:
            disasm_line += repr(arg).ljust(6)
            if argrepr:
                disasm_line += argrepr
        print(disasm_line, file=_file)
        if (repr(type(argval))[8:-2] == 'code'):
            _get_instructions_bytes(argval, _file, _line_offset + 4)
Esempio n. 19
0
def _add_attr_values_from_insert_to_original(original_code, insert_code, insert_code_obj, attribute_name, op_list):
    """
    This function appends values of the attribute `attribute_name` of the inserted code to the original values,
     and changes indexes inside inserted code. If some bytecode instruction in the inserted code used to call argument
     number i, after modification it calls argument n + i, where n - length of the values in the original code.
     So it helps to avoid variables mixing between two pieces of code.

    :param original_code: code to modify
    :param insert_code: code to insert
    :param insert_code_obj: bytes sequence of inserted code, which should be modified too
    :param attribute_name: name of attribute to modify ('co_names', 'co_consts' or 'co_varnames')
    :param op_list: sequence of bytecodes whose arguments should be changed
    :return: modified bytes sequence of the code to insert and new values of the attribute `attribute_name` for original code
    """
    orig_value = getattr(original_code, attribute_name)
    insert_value = getattr(insert_code, attribute_name)
    orig_names_len = len(orig_value)
    code_with_new_values = list(insert_code_obj)
    for offset, op, arg in dis._unpack_opargs(insert_code_obj):
        if op in op_list:
            code_with_new_values[offset + 1] += orig_names_len
    new_values = orig_value + insert_value
    return bytes(code_with_new_values), new_values
Esempio n. 20
0
def _get_instructions_bytes(
        code,
        varnames=None,
        names=None,
        constants=None,
        cells=None,  #code.co_cellvars + code.co_freevars,
        linestarts=None,
        line_offset=0):
    """Iterate over the instructions in a bytecode string.
	Generates a sequence of Instruction namedtuples giving the details of each
	opcode.  Additional information about the code's runtime environment
	(e.g. variable names, constants) can be specified using optional
	arguments.
	"""

    if not linestarts:
        linestarts = linestarts(code)
    labels = findlabels(code)
    starts_line = None
    for offset, op, arg in dis._unpack_opargs(code):
        if linestarts is not None:
            starts_line = linestarts.get(offset, None)
            if starts_line is not None:
                starts_line += line_offset
        is_jump_target = offset in labels
        argval = None
        argrepr = ''
        if arg is not None:
            #  Set argval to the dereferenced value of the argument when
            #  available, and argrepr to the string representation of argval.
            #    _disassemble_bytes needs the string repr of the
            #    raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
            argval = arg

            if op in dis.hasconst:
                argval, argrepr = dis._get_const_info(arg, constants)
            elif op in dis.hasname:
                argval, argrepr = dis._get_name_info(arg, names)
            elif op in dis.hasjrel:
                argval = offset + 2 + arg
                argrepr = "to " + repr(argval)

            elif op in dict([(op, opcode.opname.__getitem__(op))
                             for op in dis.haslocal]):
                argval, argrepr = dis._get_name_info(arg, varnames)

            elif op in dict([(op, opcode.opname.__getitem__(op))
                             for op in dis.hascompare]):
                argval = dis.cmp_op[arg]
                argrepr = argval
                #name =

            elif op in dict([(op, opcode.opname.__getitem__(op))
                             for op in dis.hasfree]):
                argval, argrepr = dis._get_name_info(arg, cells)

            elif op == dis.FORMAT_VALUE:
                argval = ((None, str, repr, ascii)[arg & 0x3], bool(arg & 0x4))
                argrepr = ('', 'str', 'repr', 'ascii')[arg & 0x3]
                if argval[1]:
                    if argrepr:
                        argrepr += ', '
                    argrepr += 'with format'
        yield Instruction(opname[op], op, arg, argval, argrepr, offset,
                          starts_line, is_jump_target)
Esempio n. 21
0
def _find_lambdex_blocks(code: types.CodeType) -> Sequence[_LambdexBlock]:
    """
    Find all lambdex block in `code.co_code`.

    The returned sequence does not assume any ordering (so that you may sort by yourself).
    """
    # Storing them locally for faster accessing
    names = code.co_names
    consts = code.co_consts
    freevars = code.co_freevars
    cellvars = code.co_cellvars
    closures = cellvars + freevars

    # Variables related to offset-lineno lookup
    linestarts = list(dis.findlinestarts(code))
    i_linestarts = -1
    n_linestarts = len(linestarts)
    lineno = -1  # The current line number

    # Variables related to currently processing blocks
    # NOTE that blocks may be cascaded (e.g., another lambdex being the default arg value
    # of one lambdex), so we need a stack-like structure
    blocks = []
    curr_block = None

    stack_depth = 0  # A single integer emulating stack evolution
    prev_op = None  # The previous op

    for offset, op, arg in dis._unpack_opargs(code.co_code):
        # Update lineno if necessary
        if (n_linestarts - 1 > i_linestarts
                and linestarts[i_linestarts + 1][0] <= offset):
            i_linestarts += 1
            lineno = linestarts[i_linestarts][1]

        # If matched a LOAD_GLOBAL / LOAD_NAME def_, start a new block
        if op in {LOAD_GLOBAL, LOAD_NAME} and names[arg] in get_declarers():
            curr_block = _LambdexBlock()
            curr_block.keyword = names[arg]
            curr_block.lineno = lineno
            curr_block.offset_start = offset
            curr_block.stack_depth = stack_depth
            curr_block.offset_start_make_lambda = offset
            blocks.append(curr_block)

        # A jump op may be encountered when building default arg values, e.g.,
        # `and`, `or` or `...if...else...` expressions.  We won't perform any
        # updates on the current block, but only discover new blocks before
        # reaching the jump target.  Before this time, the stack may be messed
        # up, so we restore the `stack_depth` after jumping.

        # We record metadata of jumping only if
        #  1) some blocks are being processed;
        #  2) the currently processed block is not jumping.
        if not blocks or curr_block.offset_jump is not None:
            pass
        elif op in HASJABS:
            effect = JABS_STACK_EFFECT_AFTER_JUMP[op]
            curr_block.offset_jump = arg
            curr_block.stack_depth_after_jump = stack_depth + effect
        elif op in HASJREL:
            effect = JREL_STACK_EFFECT_AFTER_JUMP[op]
            curr_block.offset_jump = arg + offset + 2
            curr_block.stack_depth_after_jump = stack_depth + effect

        # If reaching a jump target, we restore the stack depth
        if blocks and curr_block.offset_jump == offset:
            curr_block.offset_jump = None
            stack_depth = curr_block.stack_depth_after_jump

        # Update the stack depth as if (op, arg) is performed
        if (
                op != EXTENDED_ARG
        ):  # In Python <= 3.7, EXTENDED_ARG as argument will cause ValueError
            stack_depth += stack_effect(op, arg)

        # In the following branches, we update the current block and decide whether
        # the block is finished or broken
        if not blocks or curr_block.offset_jump is not None:
            pass
        elif curr_block.stack_depth >= stack_depth:
            # If the function `def_` or `def_.<ident>` popped unexpectedly,
            # we consider the current block as broken
            blocks.pop()
            if blocks:
                curr_block = blocks[-1]
        elif op == LOAD_METHOD and offset == curr_block.offset_start + 2:
            # If LOAD_METHOD met just after offset_start, record the name as identifier
            curr_block.identifier = names[arg]
            curr_block.offset_start_make_lambda = offset
        elif op == LOAD_CONST and iscode(consts[arg]):
            # If loading a code object, store it in `.lambda_node` (so that the last one preserved)
            curr_block.lambda_code = consts[arg]
            curr_block.code_const_idx = arg
        elif op == LOAD_CLOSURE:
            # If LOAD_CLOSURE met, record the arg as a freevar
            curr_block.freevars.append(closures[arg])
            curr_block.freevar_opargs.append(arg)
        elif prev_op == LOAD_CLOSURE and op == BUILD_TUPLE:
            # If making closure tuple, record the offset
            curr_block.offset_end_make_closure_tuple = offset
        elif op == MAKE_FUNCTION:
            # If MAKE_FUNCTION met, record the offset (so that the last one preserved)
            curr_block.make_function_mode = arg
        elif (op in {CALL_FUNCTION, CALL_METHOD}
              and stack_depth == curr_block.stack_depth + 1):
            # If CALL_FUNCTION / CALL_METHOD met and the stack is balanced, finish the current block
            curr_block.offset_end = offset
            yield blocks.pop()
            if blocks:
                curr_block = blocks[-1]

        prev_op = op
Esempio n. 22
0
def _scancode(co):
    import opcode
    arguments = []
    args = []
    opmap = opcode.opmap.copy()
    importedModule = None
    topLevel = True
    STORE_OPS = (90, 97)

    for opindex, op, oparg in dis._unpack_opargs(co.co_code):
        opname = opcode.opname[op]
        print(opindex, op, opname, oparg)

        if op >= dis.HAVE_ARGUMENT:
            if op in dis.hasconst:
                arg = [code.co_consts[oparg]]
                print('hasconst')
            elif op in dis.hasname:
                arg = [code.co_names[oparg]]
                print('hasname')
            elif op in dis.hasjrel:
                arg = [i + oparg]
                print('hasjrel')
            elif op in dis.haslocal:
                arg = [code.co_varnames[oparg]]
                print('haslocal')
            elif op in dis.hascompare:
                arg = [dis.cmp_op[oparg]]
                print(op, 'hascompare')
            elif op in dis.hasfree:
                arg = [free[oparg]]
                print(op, 'hasfree')
            else:
                arg = [oparg]
            args.append(arg)
            print('arg', arg)

        if op == opmap.get('LOAD_CONST'):
            arguments.append(co.co_consts[oparg])
            print('LOAD_CONST -> {}'.format(co.co_consts[oparg]))
            continue

        elif op == opmap.get('IMPORT_NAME'):
            name = co.co_names[oparg]
            print('IMPORT_NAME ->' + name)
            if len(arguments) >= 2:
                relativeImportIndex, fromList = arguments[-2:]
            else:
                relativeImportIndex = -1
                #fromList, = arguments
                fromList = arguments[0] if arguments else []

        elif op == opmap.get(
                'IMPORT_STAR') and topLevel and importedModule is not None:
            pass

        elif topLevel and op in STORE_OPS:
            name = co.co_names[oparg]
            arguments.append(name)

    return arguments
Esempio n. 23
0
 def update_event(self, inp=-1):
     self.set_output_val(0, dis._unpack_opargs(self.input(0)))