Exemple #1
0
    def disassemble(self, co, classname=None):
        """
        Disassemble a code object, returning a list of 'Token'.

        The main part of this procedure is modelled after
        dis.disassemble().
        """
        rv = []
        customize = {}
        Token = self.Token  # shortcut
        self.code = array("B", co.co_code)
        n = len(self.code)
        # linestarts contains bloc code adresse (addr,block)
        self.linestarts = list(dis.findlinestarts(co))
        self.prev = [0]
        # change jump struct
        self.restructRelativeJump()

        # class and names
        if classname:
            classname = "_" + classname.lstrip("_") + "__"

            def unmangle(name):
                if name.startswith(classname) and name[-2:] != "__":
                    return name[len(classname) - 2 :]
                return name

            free = [unmangle(name) for name in (co.co_cellvars + co.co_freevars)]
            names = [unmangle(name) for name in co.co_names]
            varnames = [unmangle(name) for name in co.co_varnames]
        else:
            free = co.co_cellvars + co.co_freevars
            names = co.co_names
            varnames = co.co_varnames
        self.names = names
        # add instruction to remonde in "toDel" list
        toDel = []
        # add instruction to change in "toChange" list
        self.toChange = []
        for i in self.op_range(0, n):
            op = self.code[i]
            ret = self.getOpcodeToDel(i)
            if ret != None:
                toDel += ret
        if toDel:  # degeu a revoir / repenser (tout faire d'un coup? chaud)
            toDel = sorted(list(set(toDel)))
            delta = 0
            for x in toDel:
                if self.code[x - delta] >= dis.HAVE_ARGUMENT:
                    self.code.pop(x - delta)
                    self.restructCode(x - delta)
                    self.code.pop(x - delta)
                    self.restructCode(x - delta)
                    self.code.pop(x - delta)
                    self.restructCode(x - delta)
                    delta += 3
                else:
                    self.code.pop(x - delta)
                    self.restructCode(x - delta)
                    delta += 1

        # mapping adresses of prev instru
        n = len(self.code)
        for i in self.op_range(0, n):
            op = self.code[i]
            self.prev.append(i)
            if op >= HAVE_ARGUMENT:
                self.prev.append(i)
                self.prev.append(i)

        j = 0
        linestarts = self.linestarts
        self.lines = []
        linetuple = namedtuple("linetuple", ["l_no", "next"])
        linestartoffsets = {a for (a, _) in linestarts}
        (prev_start_byte, prev_line_no) = linestarts[0]
        for (start_byte, line_no) in linestarts[1:]:
            while j < start_byte:
                self.lines.append(linetuple(prev_line_no, start_byte))
                j += 1
            last_op = self.code[self.prev[start_byte]]
            (prev_start_byte, prev_line_no) = (start_byte, line_no)
        while j < n:
            self.lines.append(linetuple(prev_line_no, n))
            j += 1
        # self.lines contains (block,addrLastInstr)
        cf = self.find_jump_targets(self.code)
        # contains (code, [addrRefToCode])

        last_stmt = self.next_stmt[0]
        i = self.next_stmt[last_stmt]
        replace = {}
        while i < n - 1:
            if self.lines[last_stmt].next > i:
                if self.code[last_stmt] == PRINT_ITEM:
                    if self.code[i] == PRINT_ITEM:
                        replace[i] = "PRINT_ITEM_CONT"
                    elif self.code[i] == PRINT_NEWLINE:
                        replace[i] = "PRINT_NEWLINE_CONT"
            last_stmt = i
            i = self.next_stmt[i]

        imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
        if len(imports) > 1:
            last_import = imports[0]
            for i in imports[1:]:
                if self.lines[last_import].next > i:
                    if self.code[last_import] == IMPORT_NAME == self.code[i]:
                        replace[i] = "IMPORT_NAME_CONT"
                last_import = i

        extended_arg = 0
        for offset in self.op_range(0, n):
            op = self.code[offset]
            opname = dis.opname[op]
            oparg = None
            pattr = None

            if offset in cf:
                k = 0
                for j in cf[offset]:
                    rv.append(Token("COME_FROM", None, repr(j), offset="%s_%d" % (offset, k)))
                    k += 1
            if op >= HAVE_ARGUMENT:
                oparg = self.get_argument(offset) + extended_arg
                extended_arg = 0
                if op == dis.EXTENDED_ARG:
                    raise "TODO"
                    extended_arg = oparg * 65536L
                    continue
                if op in dis.hasconst:
                    const = co.co_consts[oparg]
                    if type(const) == types.CodeType:
                        oparg = const
                        if const.co_name == "<lambda>":
                            assert opname == "LOAD_CONST"
                            opname = "LOAD_LAMBDA"
                        elif const.co_name == "<genexpr>":
                            opname = "LOAD_GENEXPR"
                        elif const.co_name == "<dictcomp>":
                            opname = "LOAD_DICTCOMP"
                        elif const.co_name == "<setcomp>":
                            opname = "LOAD_SETCOMP"
                        # verify uses 'pattr' for comparism, since 'attr'
                        # now holds Code(const) and thus can not be used
                        # for comparism (todo: think about changing this)
                        # pattr = 'code_object @ 0x%x %s->%s' %\
                        # 	(id(const), const.co_filename, const.co_name)
                        pattr = "<code_object " + const.co_name + ">"
                    else:
                        pattr = const
                elif op in dis.hasname:
                    pattr = names[oparg]
                elif op in dis.hasjrel:
                    pattr = repr(offset + 3 + oparg)
                elif op in dis.hasjabs:
                    pattr = repr(oparg)
                elif op in dis.haslocal:
                    pattr = varnames[oparg]
                elif op in dis.hascompare:
                    pattr = dis.cmp_op[oparg]
                elif op in dis.hasfree:
                    pattr = free[oparg]
            if offset in self.toChange:
                if self.code[offset] == JA and self.code[oparg] == WITH_CLEANUP:
                    opname = "SETUP_WITH"
                    cf[oparg] = cf.get(oparg, []) + [offset]
            if op in (
                BUILD_LIST,
                BUILD_TUPLE,
                BUILD_SLICE,
                UNPACK_SEQUENCE,
                MAKE_FUNCTION,
                CALL_FUNCTION,
                MAKE_CLOSURE,
                CALL_FUNCTION_VAR,
                CALL_FUNCTION_KW,
                CALL_FUNCTION_VAR_KW,
                DUP_TOPX,
            ):
                # CE - Hack for >= 2.5
                #      Now all values loaded via LOAD_CLOSURE are packed into
                #      a tuple before calling MAKE_CLOSURE.
                if op == BUILD_TUPLE and self.code[offset - 3] == LOAD_CLOSURE:
                    continue
                else:
                    opname = "%s_%d" % (opname, oparg)
                    if op != BUILD_SLICE:
                        customize[opname] = oparg
            elif op == JA:
                target = self.get_target(offset)
                if target < offset:
                    if (
                        offset in self.stmts
                        and self.code[offset + 3] not in (END_FINALLY, POP_BLOCK)
                        and offset not in self.not_continue
                    ):
                        opname = "CONTINUE"
                    else:
                        opname = "JUMP_BACK"

            elif op == LOAD_GLOBAL:
                try:
                    if pattr == "AssertionError" and rv and rv[-1] == "JUMP_IF_TRUE":
                        opname = "LOAD_ASSERT"
                except AttributeError:
                    pass
            elif op == RETURN_VALUE:
                if offset in self.return_end_ifs:
                    opname = "RETURN_END_IF"

            if offset not in replace:
                rv.append(Token(opname, oparg, pattr, offset, linestart=offset in linestartoffsets))
            else:
                rv.append(Token(replace[offset], oparg, pattr, offset, linestart=offset in linestartoffsets))

        if self.showasm:
            out = self.out  # shortcut
            for t in rv:
                print >> out, t
            print >> out
        return rv, customize
Exemple #2
0
    def disassemble(self, co, classname=None):
        '''
        Disassemble a code object, returning a list of 'Token'.

        The main part of this procedure is modelled after
        dis.disassemble().
        '''
        
        rv = []
        customize = {}
        Token = self.Token # shortcut
        self.code = array('B', co.co_code)
        for i in self.op_range(0, len(self.code)):
            if self.code[i] in (RETURN_VALUE, END_FINALLY):
                n = i + 1
        self.code = array('B', co.co_code[:n])
        # linestarts contains bloc code adresse (addr,block)
        self.linestarts = list(dis.findlinestarts(co))
        self.prev = [0]
        # class and names
        if classname:
            classname = '_' + classname.lstrip('_') + '__'
            def unmangle(name):
                if name.startswith(classname) and name[-2:] != '__':
                    return name[len(classname) - 2:]
                return name
                
            free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
            names = [ unmangle(name) for name in co.co_names ]
            varnames = [ unmangle(name) for name in co.co_varnames ]
        else:
            free = co.co_cellvars + co.co_freevars
            names = co.co_names
            varnames = co.co_varnames
        self.names = names

        # list of instruction to remove/add or change to match with bytecode 2.7
        self.toChange = []
        self.restructBytecode()
        codelen = len(self.code)
        # mapping adresses of prev instru 
        for i in self.op_range(0, codelen):
            op = self.code[i]
            self.prev.append(i)
            if self.op_hasArgument(op):
                self.prev.append(i)
                self.prev.append(i)
        j = 0
        linestarts = self.linestarts
        self.lines = []
        linetuple = namedtuple('linetuple', ['l_no', 'next'])
        linestartoffsets = {a for (a, _) in linestarts}
        (prev_start_byte, prev_line_no) = linestarts[0]
        for (start_byte, line_no) in linestarts[1:]:
            while j < start_byte:
                self.lines.append(linetuple(prev_line_no, start_byte))
                j += 1
            last_op = self.code[self.prev[start_byte]]
            (prev_start_byte, prev_line_no) = (start_byte, line_no)
        while j < codelen:
            self.lines.append(linetuple(prev_line_no, codelen))
            j+=1
        # self.lines contains (block,addrLastInstr)
        
        self.load_asserts = set()
        for i in self.op_range(0, codelen):
            if self.code[i] == PJIT and self.code[i+3] == LOAD_GLOBAL:
                if names[self.get_argument(i+3)] == 'AssertionError':
                    self.load_asserts.add(i+3)
        
        cf = self.find_jump_targets(self.code)
        # contains (code, [addrRefToCode])

        last_stmt = self.next_stmt[0]
        i = self.next_stmt[last_stmt]
        replace = {}
        while i < codelen-1:
            if self.lines[last_stmt].next > i:
                if self.code[last_stmt] == PRINT_ITEM:
                    if self.code[i] == PRINT_ITEM:
                        replace[i] = 'PRINT_ITEM_CONT'
                    elif self.code[i] == PRINT_NEWLINE:
                        replace[i] = 'PRINT_NEWLINE_CONT'
            last_stmt = i
            i = self.next_stmt[i]
        
        imports = self.all_instr(0, codelen, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
        if len(imports) > 1:
            last_import = imports[0]
            for i in imports[1:]:
                if self.lines[last_import].next > i:
                    if self.code[last_import] == IMPORT_NAME == self.code[i]:
                        replace[i] = 'IMPORT_NAME_CONT'
                last_import = i

        extended_arg = 0
        for offset in self.op_range(0, codelen):
            op = self.code[offset]
            op_name = opname[op]
            oparg = None; pattr = None

            if offset in cf:
                k = 0
                for j in cf[offset]:
                    rv.append(Token('COME_FROM', None, repr(j),
                                    offset="%s_%d" % (offset, k) ))
                    k += 1
            if self.op_hasArgument(op):
                oparg = self.get_argument(offset) + extended_arg
                extended_arg = 0
                if op == EXTENDED_ARG:
                    raise NotImplementedError
                    extended_arg = oparg * 65536L
                    continue
                if op in hasconst:
                    const = co.co_consts[oparg]
                    if type(const) == types.CodeType:
                        oparg = const
                        if const.co_name == '<lambda>':
                            assert op_name == 'LOAD_CONST'
                            op_name = 'LOAD_LAMBDA'
                        elif const.co_name == '<genexpr>':
                            op_name = 'LOAD_GENEXPR'
                        elif const.co_name == '<dictcomp>':
                            op_name = 'LOAD_DICTCOMP'
                        elif const.co_name == '<setcomp>':
                            op_name = 'LOAD_SETCOMP'
                        # verify uses 'pattr' for comparism, since 'attr'
                        # now holds Code(const) and thus can not be used
                        # for comparism (todo: think about changing this)
                        #pattr = 'code_object @ 0x%x %s->%s' %\
                        #	(id(const), const.co_filename, const.co_name)
                        pattr = '<code_object ' + const.co_name + '>'
                    else:
                        pattr = const
                elif op in hasname:
                    pattr = names[oparg]
                elif op in hasjrel:
                    pattr = repr(offset + 3 + oparg)
                elif op in hasjabs:
                    pattr = repr(oparg)
                elif op in haslocal:
                    pattr = varnames[oparg]
                elif op in hascompare:
                    pattr = cmp_op[oparg]
                elif op in hasfree:
                    pattr = free[oparg]
            if offset in self.toChange:
                if self.code[offset] == JA and self.code[oparg] == WITH_CLEANUP:
                    op_name = 'SETUP_WITH'
                    cf[oparg] = cf.get(oparg, []) + [offset]
            if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SLICE,
                            UNPACK_SEQUENCE,
                            MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
                            CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
                            CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS
                            ):
                # CE - Hack for >= 2.5
                #      Now all values loaded via LOAD_CLOSURE are packed into
                #      a tuple before calling MAKE_CLOSURE.
                if op == BUILD_TUPLE and \
                    self.code[self.prev[offset]] == LOAD_CLOSURE:
                    continue
                else:
                    op_name = '%s_%d' % (op_name, oparg)
                    if op != BUILD_SLICE:
                        customize[op_name] = oparg
            elif op == JA:
                target = self.get_target(offset)
                if target < offset:
                    if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
                     and offset not in self.not_continue:
                        op_name = 'CONTINUE'
                    else:
                        op_name = 'JUMP_BACK'

            elif op == LOAD_GLOBAL:
                if offset in self.load_asserts:
                    op_name = 'LOAD_ASSERT'
            elif op == RETURN_VALUE:
                if offset in self.return_end_ifs:
                    op_name = 'RETURN_END_IF'

            if offset not in replace:
                rv.append(Token(op_name, oparg, pattr, offset, linestart = offset in linestartoffsets))
            else:
                rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))

        if self.showasm:
            out = self.out # shortcut
            for t in rv:
                print >>out, t
            print >>out
        return rv, customize
Exemple #3
0
    def disassemble(self, co, classname=None):
        '''
        Disassemble a code object, returning a list of 'Token'.

        The main part of this procedure is modelled after
        dis.disassemble().
        '''

        rv = []
        customize = {}
        Token = self.Token  # shortcut
        self.code = array('B', co.co_code)
        for i in self.op_range(0, len(self.code)):
            if self.code[i] in (RETURN_VALUE, END_FINALLY):
                n = i + 1
        self.code = array('B', co.co_code[:n])
        # linestarts contains bloc code adresse (addr,block)
        self.linestarts = list(dis.findlinestarts(co))
        self.prev = [0]
        # class and names
        if classname:
            classname = '_' + classname.lstrip('_') + '__'

            def unmangle(name):
                if name.startswith(classname) and name[-2:] != '__':
                    return name[len(classname) - 2:]
                return name

            free = [
                unmangle(name) for name in (co.co_cellvars + co.co_freevars)
            ]
            names = [unmangle(name) for name in co.co_names]
            varnames = [unmangle(name) for name in co.co_varnames]
        else:
            free = co.co_cellvars + co.co_freevars
            names = co.co_names
            varnames = co.co_varnames
        self.names = names

        # list of instruction to remove/add or change to match with bytecode 2.7
        self.toChange = []
        self.restructBytecode()
        codelen = len(self.code)
        # mapping adresses of prev instru
        for i in self.op_range(0, codelen):
            op = self.code[i]
            self.prev.append(i)
            if self.op_hasArgument(op):
                self.prev.append(i)
                self.prev.append(i)
        j = 0
        linestarts = self.linestarts
        self.lines = []
        linetuple = namedtuple('linetuple', ['l_no', 'next'])
        linestartoffsets = {a for (a, _) in linestarts}
        (prev_start_byte, prev_line_no) = linestarts[0]
        for (start_byte, line_no) in linestarts[1:]:
            while j < start_byte:
                self.lines.append(linetuple(prev_line_no, start_byte))
                j += 1
            last_op = self.code[self.prev[start_byte]]
            (prev_start_byte, prev_line_no) = (start_byte, line_no)
        while j < codelen:
            self.lines.append(linetuple(prev_line_no, codelen))
            j += 1
        # self.lines contains (block,addrLastInstr)

        self.load_asserts = set()
        for i in self.op_range(0, codelen):
            if self.code[i] == PJIT and self.code[i + 3] == LOAD_GLOBAL:
                if names[self.get_argument(i + 3)] == 'AssertionError':
                    self.load_asserts.add(i + 3)

        cf = self.find_jump_targets(self.code)
        # contains (code, [addrRefToCode])

        last_stmt = self.next_stmt[0]
        i = self.next_stmt[last_stmt]
        replace = {}
        while i < codelen - 1:
            if self.lines[last_stmt].next > i:
                if self.code[last_stmt] == PRINT_ITEM:
                    if self.code[i] == PRINT_ITEM:
                        replace[i] = 'PRINT_ITEM_CONT'
                    elif self.code[i] == PRINT_NEWLINE:
                        replace[i] = 'PRINT_NEWLINE_CONT'
            last_stmt = i
            i = self.next_stmt[i]

        imports = self.all_instr(0, codelen,
                                 (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
        if len(imports) > 1:
            last_import = imports[0]
            for i in imports[1:]:
                if self.lines[last_import].next > i:
                    if self.code[last_import] == IMPORT_NAME == self.code[i]:
                        replace[i] = 'IMPORT_NAME_CONT'
                last_import = i

        extended_arg = 0
        for offset in self.op_range(0, codelen):
            op = self.code[offset]
            op_name = opname[op]
            oparg = None
            pattr = None

            if offset in cf:
                k = 0
                for j in cf[offset]:
                    rv.append(
                        Token('COME_FROM',
                              None,
                              repr(j),
                              offset="%s_%d" % (offset, k)))
                    k += 1
            if self.op_hasArgument(op):
                oparg = self.get_argument(offset) + extended_arg
                extended_arg = 0
                if op == EXTENDED_ARG:
                    raise NotImplementedError
                    extended_arg = oparg * 65536L
                    continue
                if op in hasconst:
                    const = co.co_consts[oparg]
                    if type(const) == types.CodeType:
                        oparg = const
                        if const.co_name == '<lambda>':
                            assert op_name == 'LOAD_CONST'
                            op_name = 'LOAD_LAMBDA'
                        elif const.co_name == '<genexpr>':
                            op_name = 'LOAD_GENEXPR'
                        elif const.co_name == '<dictcomp>':
                            op_name = 'LOAD_DICTCOMP'
                        elif const.co_name == '<setcomp>':
                            op_name = 'LOAD_SETCOMP'
                        # verify uses 'pattr' for comparism, since 'attr'
                        # now holds Code(const) and thus can not be used
                        # for comparism (todo: think about changing this)
                        #pattr = 'code_object @ 0x%x %s->%s' %\
                        #	(id(const), const.co_filename, const.co_name)
                        pattr = '<code_object ' + const.co_name + '>'
                    else:
                        pattr = const
                elif op in hasname:
                    pattr = names[oparg]
                elif op in hasjrel:
                    pattr = repr(offset + 3 + oparg)
                elif op in hasjabs:
                    pattr = repr(oparg)
                elif op in haslocal:
                    pattr = varnames[oparg]
                elif op in hascompare:
                    pattr = cmp_op[oparg]
                elif op in hasfree:
                    pattr = free[oparg]
            if offset in self.toChange:
                if self.code[offset] == JA and self.code[oparg] == WITH_CLEANUP:
                    op_name = 'SETUP_WITH'
                    cf[oparg] = cf.get(oparg, []) + [offset]
            if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SLICE, UNPACK_SEQUENCE,
                      MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
                      CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
                      CALL_FUNCTION_VAR_KW, DUP_TOPX, RAISE_VARARGS):
                # CE - Hack for >= 2.5
                #      Now all values loaded via LOAD_CLOSURE are packed into
                #      a tuple before calling MAKE_CLOSURE.
                if op == BUILD_TUPLE and \
                    self.code[self.prev[offset]] == LOAD_CLOSURE:
                    continue
                else:
                    op_name = '%s_%d' % (op_name, oparg)
                    if op != BUILD_SLICE:
                        customize[op_name] = oparg
            elif op == JA:
                target = self.get_target(offset)
                if target < offset:
                    if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
                     and offset not in self.not_continue:
                        op_name = 'CONTINUE'
                    else:
                        op_name = 'JUMP_BACK'

            elif op == LOAD_GLOBAL:
                if offset in self.load_asserts:
                    op_name = 'LOAD_ASSERT'
            elif op == RETURN_VALUE:
                if offset in self.return_end_ifs:
                    op_name = 'RETURN_END_IF'

            if offset not in replace:
                rv.append(
                    Token(op_name,
                          oparg,
                          pattr,
                          offset,
                          linestart=offset in linestartoffsets))
            else:
                rv.append(
                    Token(replace[offset],
                          oparg,
                          pattr,
                          offset,
                          linestart=offset in linestartoffsets))

        if self.showasm:
            out = self.out  # shortcut
            for t in rv:
                print >> out, t
            print >> out
        return rv, customize