Ejemplo n.º 1
0
    def to_code(self):
        """
        Convert this instance back into a native python code object. This
        only works if the internals of the code object are compatible with
        those of the running python version.

        Returns:
            types.CodeType: The native python code object.
        """

        if self.internals is not get_py_internals():
            raise ValueError('CodeObject is not compatible with the running python internals.')

        if six.PY2:
            return types.CodeType(
                self.co_argcount, self.co_nlocals, self.co_stacksize, self.co_flags, self.co_code, self.co_consts,
                self.co_names, self.co_varnames, self.co_filename, self.co_name, self.co_firstlineno, self.co_lnotab,
                self.co_freevars, self.co_cellvars
            )
        else:
            return types.CodeType(
                self.co_argcount, self.co_kwonlyargcount, self.co_nlocals, self.co_stacksize, self.co_flags,
                self.co_code, self.co_consts, self.co_names, self.co_varnames, self.co_filename, self.co_name,
                self.co_firstlineno, self.co_lnotab, self.co_freevars, self.co_cellvars
            )
Ejemplo n.º 2
0
    def to_code(self):
        """
        Convert this instance back into a native python code object. This
        only works if the internals of the code object are compatible with
        those of the running python version.

        Returns:
            types.CodeType: The native python code object.
        """

        if self.internals is not get_py_internals():
            raise ValueError(
                'CodeObject is not compatible with the running python internals.'
            )

        if six.PY2:
            return types.CodeType(self.co_argcount, self.co_nlocals,
                                  self.co_stacksize, self.co_flags,
                                  self.co_code, self.co_consts, self.co_names,
                                  self.co_varnames, self.co_filename,
                                  self.co_name, self.co_firstlineno,
                                  self.co_lnotab, self.co_freevars,
                                  self.co_cellvars)
        else:
            return types.CodeType(self.co_argcount, self.co_kwonlyargcount,
                                  self.co_nlocals, self.co_stacksize,
                                  self.co_flags, self.co_code, self.co_consts,
                                  self.co_names, self.co_varnames,
                                  self.co_filename, self.co_name,
                                  self.co_firstlineno, self.co_lnotab,
                                  self.co_freevars, self.co_cellvars)
Ejemplo n.º 3
0
 def __init__(self,
              co_argcount,
              co_kwonlyargcount,
              co_nlocals,
              co_stacksize,
              co_flags,
              co_code,
              co_consts,
              co_names,
              co_varnames,
              co_filename,
              co_name,
              co_firstlineno,
              co_lnotab,
              co_freevars,
              co_cellvars,
              origin=None):
     self.co_argcount = co_argcount
     self.co_kwonlyargcount = co_kwonlyargcount
     self.co_nlocals = co_nlocals
     self.co_stacksize = co_stacksize
     self.co_flags = co_flags
     self.co_code = co_code
     self.co_consts = co_consts
     self.co_names = co_names
     self.co_varnames = co_varnames
     self.co_filename = co_filename
     self.co_name = co_name
     self.co_firstlineno = co_firstlineno
     self.co_lnotab = co_lnotab
     self.co_freevars = co_freevars
     self.co_cellvars = co_cellvars
     self.internals = get_py_internals(origin)
Ejemplo n.º 4
0
def get_protocol_version(protocol=None, target=None):
    """
    Return a suitable pickle protocol version for a given target.

    Arguments:
        target: The internals description of the targeted python
            version. If this is ``None`` the specification of the currently
            running python version will be used.
        protocol(None or int): The requested protocol version (or None for the
            default of the target python version).

    Returns:
        int: A suitable pickle protocol version.
    """

    target = get_py_internals(target)

    if protocol is None:
        protocol = target['pickle_default_protocol']

    if protocol > cPickle.HIGHEST_PROTOCOL:
        warnings.warn('Downgrading pickle protocol, running python supports up to %d.' % cPickle.HIGHEST_PROTOCOL)
        protocol = cPickle.HIGHEST_PROTOCOL

    target_highest_protocol = target['pickle_highest_protocol']
    if protocol > target_highest_protocol:
        warnings.warn('Downgrading pickle protocol, target python supports up to %d.' % target_highest_protocol)
        protocol = target_highest_protocol

    return protocol
Ejemplo n.º 5
0
def translate_opcodes(code_obj, target):
    """
    Very crude inter-python version opcode translator. Raises SyntaxError when
    the opcode doesn't exist in the destination opmap. Used to transcribe
    python code objects between python versions.

    Arguments:
        code_obj(pwnypack.bytecode.CodeObject): The code object representation
            to translate.
        target(dict): The py_internals structure for the target
            python version.
    """

    target = get_py_internals(target)
    src_ops = code_obj.disassemble()

    dst_opmap = target['opmap']
    dst_ops = []

    op_iter = enumerate(src_ops)
    for i, op in op_iter:
        if isinstance(op, pwnypack.bytecode.Label):
            dst_ops.append(op)
            continue

        if op.name not in dst_opmap:
            if op.name == 'POP_JUMP_IF_FALSE' and 'JUMP_IF_TRUE' in dst_opmap:
                lbl = pwnypack.bytecode.Label()
                dst_ops.extend([
                    pwnypack.bytecode.Op('JUMP_IF_TRUE', lbl),
                    pwnypack.bytecode.Op('POP_TOP', None),
                    pwnypack.bytecode.Op('JUMP_ABSOLUTE', op.arg),
                    lbl,
                    pwnypack.bytecode.Op('POP_TOP', None),
                ])
            elif op.name == 'POP_JUMP_IF_TRUE' and 'JUMP_IF_FALSE' in dst_opmap:
                lbl = pwnypack.bytecode.Label()
                dst_ops.extend([
                    pwnypack.bytecode.Op('JUMP_IF_FALSE', lbl),
                    pwnypack.bytecode.Op('POP_TOP', None),
                    pwnypack.bytecode.Op('JUMP_ABSOLUTE', op.arg),
                    lbl,
                    pwnypack.bytecode.Op('POP_TOP', None),
                ])
            elif op.name == 'JUMP_IF_FALSE' and 'JUMP_IF_FALSE_OR_POP' in dst_opmap and \
                    src_ops[i + 1].name == 'POP_TOP':
                next(op_iter)
                dst_ops.append(pwnypack.bytecode.Op('JUMP_IF_FALSE_OR_POP', op.arg))
            elif op.name == 'JUMP_IF_TRUE' and 'JUMP_IF_TRUE_OR_POP' in dst_opmap and \
                    src_ops[i + 1].name == 'POP_TOP':
                next(op_iter)
                dst_ops.append(pwnypack.bytecode.Op('JUMP_IF_TRUE_OR_POP', op.arg))
            else:
                raise SyntaxError('Opcode %s not supported on target.' % op.name)
        else:
            dst_ops.append(op)

    code_obj.assemble(dst_ops, target)
Ejemplo n.º 6
0
 def __init__(self, co_argcount, co_kwonlyargcount, co_nlocals, co_stacksize, co_flags, co_code, co_consts,
              co_names, co_varnames, co_filename, co_name, co_firstlineno, co_lnotab, co_freevars, co_cellvars,
              origin=None):
     self.co_argcount = co_argcount
     self.co_kwonlyargcount = co_kwonlyargcount
     self.co_nlocals = co_nlocals
     self.co_stacksize = co_stacksize
     self.co_flags = co_flags
     self.co_code = co_code
     self.co_consts = co_consts
     self.co_names = co_names
     self.co_varnames = co_varnames
     self.co_filename = co_filename
     self.co_name = co_name
     self.co_firstlineno = co_firstlineno
     self.co_lnotab = co_lnotab
     self.co_freevars = co_freevars
     self.co_cellvars = co_cellvars
     self.internals = get_py_internals(origin)
Ejemplo n.º 7
0
    def assemble(self, ops, target=None):
        """
        Assemble a series of operations and labels into bytecode, analyse its
        stack usage and replace the bytecode and stack size of this code
        object. Can also (optionally) change the target python version.

        Arguments:
            ops(list): The opcodes (and labels) to assemble into bytecode.
            target: The opcode specification of the targeted python
                version. If this is ``None`` the specification of the currently
                running python version will be used.

        Returns:
            CodeObject: A reference to this :class:`CodeObject`.
        """

        self.internals = target = get_py_internals(target, self.internals)
        self.co_code = assemble(ops, target)
        self.co_stacksize = calculate_max_stack_depth(ops, target)
        return self
Ejemplo n.º 8
0
    def assemble(self, ops, target=None):
        """
        Assemble a series of operations and labels into bytecode, analyse its
        stack usage and replace the bytecode and stack size of this code
        object. Can also (optionally) change the target python version.

        Arguments:
            ops(list): The opcodes (and labels) to assemble into bytecode.
            target: The opcode specification of the targeted python
                version. If this is ``None`` the specification of the currently
                running python version will be used.

        Returns:
            CodeObject: A reference to this :class:`CodeObject`.
        """

        self.internals = target = get_py_internals(target, self.internals)
        self.co_code = assemble(ops, target)
        self.co_stacksize = calculate_max_stack_depth(ops, target)
        return self
Ejemplo n.º 9
0
def disassemble(code, origin=None):
    """
    Disassemble python bytecode into a series of :class:`Op` and
    :class:`Label` instances.

    Arguments:
        code(bytes): The bytecode (a code object's ``co_code`` property). You
            can also provide a function.
        origin(dict): The opcode specification of the python version that
            generated ``code``. If you provide ``None``, the specs for the
            currently running python version will be used.

    Returns:
        list: A list of opcodes and labels.
    """

    if inspect.isfunction(code):
        code = six.get_function_code(code).co_code

    origin = get_py_internals(origin)

    opname = origin['opname']
    hasjrel = origin['hasjrel']
    hasjabs = origin['hasjabs']
    hasjump = set(hasjrel) | set(hasjabs)
    wordcode = origin['wordcode']
    if not wordcode:
        ext_arg_shift = 16
    else:
        ext_arg_shift = 8

    ext_arg_name = opname[origin['extended_arg']]
    ext_arg = 0

    addr_labels = {}
    addr_ops = []

    code_iter = enumerate(six.iterbytes(code))
    for op_addr, op_code in code_iter:
        if op_code >= origin['have_argument']:
            rel_addr, arg = next(code_iter)
            if not wordcode:
                rel_addr, b = next(code_iter)
                arg += b << 8

            arg += ext_arg

            if op_code in hasjrel:
                arg += rel_addr

            if op_code in hasjump:
                arg = addr_labels.setdefault(arg, Label())
        else:
            if wordcode:
                next(code_iter)
            arg = None
        ext_arg = 0

        op_name = opname[op_code]

        if op_name == ext_arg_name:
            ext_arg = arg << ext_arg_shift
            op = None
        else:
            op = Op(op_name, arg)

        addr_ops.append((op_addr, op))

    ops = []
    for op_addr, op in addr_ops:
        label = addr_labels.get(op_addr)
        if label is not None:
            ops.append(label)

        if op is not None:
            ops.append(op)

    return ops
Ejemplo n.º 10
0
def calculate_max_stack_depth(ops, target=None):
    """
    Calculate the maximum stack depth (and required stack size) from a series
    of :class:`Op` and :class:`Label` instances. This is required when you
    manipulate the opcodes in such a way that the stack layout might change
    and you want to re-create a working function from it.

    This is a fairly literal re-implementation of python's stackdepth and
    stackdepth_walk.

    Arguments:
        ops(list): A list of opcodes and labels (as returned by
            :func:`disassemble`).
        target: The opcode specification of the targeted python
            version. If this is ``None`` the specification of the currently
            running python version will be used.

    Returns:
        int: The calculated maximum stack depth.
    """

    blocks = blocks_from_ops(ops)
    target = get_py_internals(target)

    block = blocks[None]
    while block:
        block.seen = False
        block.startdepth = -1
        block = block.next

    stackeffect = target['stackeffect']
    stackeffect_traits = target['stackeffect_traits']

    def walk(block=None, depth=0, max_depth=0):
        if not isinstance(block, Block):
            block = blocks[block]

        if block.seen or block.startdepth >= depth:
            return max_depth

        block.seen = True
        block.startdepth = depth

        for op in block.ops:
            effect = stackeffect[op.name]
            if callable(effect):
                effect = effect(op.arg)

            depth += effect
            if depth > max_depth:
                max_depth = depth

            op_code = target['opmap'][op.name]
            if op_code in target['hasjrel'] or op_code in target['hasjabs']:
                target_depth = depth

                if stackeffect_traits & 1:
                    if op.name == 'FOR_ITER':
                        target_depth -= 2
                    elif op.name in ('SETUP_FINALLY', 'SETUP_EXCEPT'):
                        target_depth += 3
                        if target_depth > max_depth:
                            max_depth = target_depth
                if stackeffect_traits & 2:
                    if op.name in ('JUMP_IF_TRUE_OR_POP',
                                   'JUMP_IF_FALSE_OR_POP'):
                        depth -= 1

                max_depth = walk(op.arg, target_depth, max_depth)
            if op.name in ('JUMP_ABSOLUTE', 'JUMP_FORWARD'):
                break

        else:
            if block.next:
                max_depth = walk(block.next, depth, max_depth)

        block.seen = False

        return max_depth

    return walk()
Ejemplo n.º 11
0
def assemble(ops, target=None):
    """
    Assemble a set of :class:`Op` and :class:`Label` instance back into
    bytecode.

    Arguments:
        ops(list): A list of opcodes and labels (as returned by
            :func:`disassemble`).
        target: The opcode specification of the targeted python
            version. If this is ``None`` the specification of the currently
            running python version will be used.

    Returns:
        bytes: The assembled bytecode.
    """

    target = get_py_internals(target)

    opmap = target['opmap']
    hasjrel = target['hasjrel']
    hasjabs = target['hasjabs']
    hasjump = set(hasjrel) | set(hasjabs)
    have_argument = target['have_argument']
    extended_arg = target['extended_arg']
    wordcode = target['wordcode']

    if not wordcode:

        def encode_op(output, op_code, op_arg=None):
            n = 1
            if op_arg is None:
                output.append(op_code)
            else:
                n += 2
                ext_arg = op_arg >> 16
                if ext_arg:
                    n += 3
                    output.extend([extended_arg, ext_arg & 255, ext_arg >> 8])
                    op_arg &= 65535
                output.extend([op_code, op_arg & 255, op_arg >> 8])
            return n
    else:

        def encode_op(output, op_code, op_arg=None):
            n = 2
            if op_arg is None:
                output.extend([op_code, 0])
            else:
                ext_arg = op_arg >> 8
                if ext_arg:
                    n += encode_op(extended_arg, ext_arg)
                output.extend([op_code, op_arg & 255])
            return n

    # A bit of a chicken and egg problem: The address of a label depends on the instructions before it. However,
    # the instructions before a label might depend on the label itself: For very large functions, jumps may
    # require an EXTENDED_ARG opcode if the jump destination is far away. Which we only know when the label
    # has materialized, which means the address of the label will change on the next pass, which might mean
    # a different jump offset might become larger, etc... We run passes until no label changes address.

    label_address = {}
    while True:
        retry = False
        output = bytearray()
        address = 0

        for op in ops:
            if isinstance(op, Label):
                if label_address.get(op) != address:
                    retry = True
                    label_address[op] = address
                continue

            op_code = opmap[op.name]
            op_arg = op.arg

            if op_code >= have_argument and op_arg is None:
                # Sanity check.
                raise ValueError('Opcode %s requires argument.' % op)
            elif op_code < have_argument and op_arg is not None:
                # Sanity check.
                raise ValueError('Opcode %s should not have an argument.' % op)
            elif isinstance(op_arg, Label):
                if op_code not in hasjump:
                    # Sanity check.
                    raise ValueError(
                        'Did not expect label as argument for opcode %s.' % op)

                if op_arg not in ops:
                    # Sanity check.
                    raise ValueError('Label is not part of this op list.')

                # Try to turn the label argument into an address.
                op_arg = label_address.get(op_arg)
                if op_arg is None:
                    # Label hasn't materialized yet, we'll catch it on the next pass.
                    address += encode_op(output, op_code, 0)
                    continue

                if op_code in hasjrel:
                    op_arg -= address
            elif op_code in hasjump:
                # Sanity check.
                raise ValueError('Expected label as argument for opcode %s.' %
                                 op)

            # Encode the opcode and the argument.
            n = encode_op(output, op_code, op_arg)
            address += n

            if op_code in hasjrel:
                if not wordcode:
                    op_arg = output[-2] + (output[-1] << 8)
                    if op_arg < n:
                        ext_arg = output[-5] + (output[-4] << 8) - 1
                        output[-5], output[-4] = ext_arg & 255, ext_arg >> 8
                        op_arg += 65536
                    op_arg -= n
                    output[-2], output[-1] = op_arg & 255, op_arg >> 8
                else:
                    for i in itertools.count(1, 2):
                        if n <= output[-i]:
                            output[-i] -= n
                            break
                        output[-i] += 256 - n
                        n = 1

        if not retry:
            return bytes(output)
Ejemplo n.º 12
0
def pickle_func(func, target=None, protocol=None, b64encode=None, *args):
    """pickle_func(func, *args, target=None, protocol=None, b64encode=None)

    Encode a function in such a way that when it's unpickled, the function is
    reconstructed and called with the given arguments.

    Note:
        Compatibility between python versions is not guaranteed. Depending on
        the `target` python version, the opcodes of the provided function are
        transcribed to try to maintain compatibility. If an opcode is emitted
        which is not supported by the target python version, a KeyError will
        be raised.

        Constructs that are known to be problematic:

        - Python 2.6 and 2.7/3.0 use very different, incompatible opcodes for
          conditional jumps (if, while, etc). Serializing those is not
          always possible between python 2.6 and 2.7/3.0.

        - Exception handling uses different, incompatible opcodes between
          python 2 and 3.

        - Python 2 and python 3 handle nested functions very differently: the
          same opcode is used in a different way and leads to a crash. Avoid
          nesting functions if you want to pickle across python functions.

    Arguments:
        func(callable): The function to serialize and call when unpickled.
        args(tuple): The arguments to call the callable with.
        target: The internals description of the targeted python
            version. If this is ``None`` the specification of the currently
            running python version will be used.
        protocol(int): The pickle protocol version to use.
        b64encode(bool): Whether to base64 certain code object fields. Required
            when you prepare a pickle for python 3 on python 2. If it's
            ``None`` it defaults to ``False`` unless pickling from python 2 to
            python 3.

    Returns:
        bytes: The data that when unpickled calls ``func(*args)``.

    Example:
        >>> from pwny import *
        >>> import pickle
        >>> def hello(arg):
        ...     print('Hello, %s!' % arg)
        ...
        >>> p = pickle_func(hello, 'world')
        >>> del hello
        >>> pickle.loads(p)
        Hello, world!
    """

    target = get_py_internals(target)

    code = six.get_function_code(func)
    code_obj = pwnypack.bytecode.CodeObject.from_code(code)
    translate_opcodes(code_obj, target)

    def code_reduce_v2(_):
        if b64encode:
            # b64encode co_code and co_lnotab as they contain 8bit data.
            co_code = PickleInvoke(base64.b64decode, base64.b64encode(code_obj.co_code))
            co_lnotab = PickleInvoke(base64.b64decode, base64.b64encode(code_obj.co_lnotab))
        else:
            co_code = code_obj.co_code
            co_lnotab = code_obj.co_lnotab

        if six.PY3:
            # Encode unicode to bytes as python 2 doesn't support unicode identifiers.
            co_names = tuple(n.encode('ascii') for n in code_obj.co_names)
            co_varnames = tuple(n.encode('ascii') for n in code_obj.co_varnames)
            co_filename = code_obj.co_filename.encode('ascii')
            co_name = code_obj.co_name.encode('ascii')
        else:
            co_names = code_obj.co_names
            co_varnames = code_obj.co_varnames
            co_filename = code_obj.co_filename
            co_name = code_obj.co_name

        return types.CodeType, (code_obj.co_argcount, code_obj.co_nlocals, code_obj.co_stacksize, code_obj.co_flags,
                                co_code, code_obj.co_consts, co_names, co_varnames, co_filename, co_name,
                                code_obj.co_firstlineno, co_lnotab)

    def code_reduce_v3(_):
        if b64encode:
            # b64encode co_code and co_lnotab as they contain 8bit data.
            co_code = PickleInvoke(base64.b64decode, base64.b64encode(code_obj.co_code))
            co_lnotab = PickleInvoke(base64.b64decode, base64.b64encode(code_obj.co_lnotab))
        else:
            co_code = code_obj.co_code
            co_lnotab = code_obj.co_lnotab

        return types.CodeType, (code_obj.co_argcount, code_obj.co_kwonlyargcount, code_obj.co_nlocals,
                                code_obj.co_stacksize, code_obj.co_flags, co_code, code_obj.co_consts,
                                code_obj.co_names, code_obj.co_varnames, code_obj.co_filename, code_obj.co_name,
                                code_obj.co_firstlineno, co_lnotab)

    # Stubs to trick cPickle into pickling calls to CodeType/FunctionType.
    class CodeType(object):  # pragma: no cover
        pass
    CodeType.__module__ = 'types'
    CodeType.__qualname__ = 'CodeType'

    class FunctionType(object):  # pragma: no cover
        pass
    FunctionType.__module__ = 'types'
    FunctionType.__qualname__ = 'FunctionType'

    protocol = get_protocol_version(protocol, target)

    old_code_reduce = copyreg.dispatch_table.pop(types.CodeType, None)
    if target['version'] < 30:
        copyreg.pickle(types.CodeType, code_reduce_v2)
    else:
        if six.PY2:
            if b64encode is False:
                warnings.warn('Enabling b64encode, pickling from python 2 to 3.')
            b64encode = True
        copyreg.pickle(types.CodeType, code_reduce_v3)

    # This has an astonishing level of evil just to convince pickle to pickle CodeType and FunctionType:
    old_code_type, types.CodeType = types.CodeType, CodeType
    old_function_type, types.FunctionType = types.FunctionType, FunctionType

    try:
        build_func = PickleInvoke(types.FunctionType, code, PickleInvoke(globals))
        return cPickle.dumps(PickleInvoke(build_func, *args), protocol)
    finally:
        types.CodeType = old_code_type
        types.FunctionType = old_function_type

        if old_code_reduce is not None:
            copyreg.pickle(types.CodeType, old_code_reduce)
        else:
            del copyreg.dispatch_table[types.CodeType]
Ejemplo n.º 13
0
def disassemble(code, origin=None):
    """
    Disassemble python bytecode into a series of :class:`Op` and
    :class:`Label` instances.

    Arguments:
        code(bytes): The bytecode (a code object's ``co_code`` property). You
            can also provide a function.
        origin(dict): The opcode specification of the python version that
            generated ``code``. If you provide ``None``, the specs for the
            currently running python version will be used.

    Returns:
        list: A list of opcodes and labels.
    """

    if inspect.isfunction(code):
        code = six.get_function_code(code).co_code

    origin = get_py_internals(origin)

    opname = origin['opname']
    hasjrel = origin['hasjrel']
    hasjabs = origin['hasjabs']
    hasjump = set(hasjrel) | set(hasjabs)

    ext_arg_name = opname[origin['extended_arg']]
    ext_arg = 0

    addr_labels = {}
    addr_ops = []

    code_iter = enumerate(six.iterbytes(code))
    for op_addr, op_code in code_iter:
        if op_code >= origin['have_argument']:
            _, a = next(code_iter)
            _, b = next(code_iter)
            arg = a + (b << 8) + ext_arg

            if op_code in hasjrel:
                arg += op_addr + 3

            if op_code in hasjump:
                arg = addr_labels.setdefault(arg, Label())
        else:
            arg = None
        ext_arg = 0

        op_name = opname[op_code]

        if op_name == ext_arg_name:
            ext_arg = arg << 16
            op = None
        else:
            op = Op(op_name, arg)

        addr_ops.append((op_addr, op))

    ops = []
    for op_addr, op in addr_ops:
        label = addr_labels.get(op_addr)
        if label is not None:
            ops.append(label)

        if op is not None:
            ops.append(op)

    return ops
Ejemplo n.º 14
0
def calculate_max_stack_depth(ops, target=None):
    """
    Calculate the maximum stack depth (and required stack size) from a series
    of :class:`Op` and :class:`Label` instances. This is required when you
    manipulate the opcodes in such a way that the stack layout might change
    and you want to re-create a working function from it.

    This is a fairly literal re-implementation of python's stackdepth and
    stackdepth_walk.

    Arguments:
        ops(list): A list of opcodes and labels (as returned by
            :func:`disassemble`).
        target: The opcode specification of the targeted python
            version. If this is ``None`` the specification of the currently
            running python version will be used.

    Returns:
        int: The calculated maximum stack depth.
    """

    blocks = blocks_from_ops(ops)
    target = get_py_internals(target)

    block = blocks[None]
    while block:
        block.seen = False
        block.startdepth = -1
        block = block.next

    stackeffect = target['stackeffect']
    stackeffect_traits = target['stackeffect_traits']

    def walk(block=None, depth=0, max_depth=0):
        if not isinstance(block, Block):
            block = blocks[block]

        if block.seen or block.startdepth >= depth:
            return max_depth

        block.seen = True
        block.startdepth = depth

        for op in block.ops:
            effect = stackeffect[op.name]
            if callable(effect):
                effect = effect(op.arg)

            depth += effect
            if depth > max_depth:
                max_depth = depth

            op_code = target['opmap'][op.name]
            if op_code in target['hasjrel'] or op_code in target['hasjabs']:
                target_depth = depth

                if stackeffect_traits & 1:
                    if op.name == 'FOR_ITER':
                        target_depth -= 2
                    elif op.name in ('SETUP_FINALLY', 'SETUP_EXCEPT'):
                        target_depth += 3
                        if target_depth > max_depth:
                            max_depth = target_depth
                if stackeffect_traits & 2:
                    if op.name in ('JUMP_IF_TRUE_OR_POP', 'JUMP_IF_FALSE_OR_POP'):
                        depth -= 1

                max_depth = walk(op.arg, target_depth, max_depth)
            if op.name in ('JUMP_ABSOLUTE', 'JUMP_FORWARD'):
                break

        else:
            if block.next:
                max_depth = walk(block.next, depth, max_depth)

        block.seen = False

        return max_depth

    return walk()
Ejemplo n.º 15
0
def assemble(ops, target=None):
    """
    Assemble a set of :class:`Op` and :class:`Label` instance back into
    bytecode.

    Arguments:
        ops(list): A list of opcodes and labels (as returned by
            :func:`disassemble`).
        target: The opcode specification of the targeted python
            version. If this is ``None`` the specification of the currently
            running python version will be used.

    Returns:
        bytes: The assembled bytecode.
    """

    def encode_op(op_code, op_arg=None):
        if op_arg is None:
            return six.int2byte(op_code)
        else:
            return six.int2byte(op_code) + six.int2byte(op_arg & 255) + six.int2byte(op_arg >> 8)

    target = get_py_internals(target)

    opmap = target['opmap']
    hasjrel = target['hasjrel']
    hasjabs = target['hasjabs']
    hasjump = set(hasjrel) | set(hasjabs)
    have_argument = target['have_argument']
    extended_arg = target['extended_arg']

    # A bit of a chicken and egg problem: The address of a label depends on the instructions before it. However,
    # the instructions before a label might depend on the label itself: For very large functions, jumps may
    # require an EXTENDED_ARG opcode if the jump destination is far away. Which we only know when the label
    # has materialized, which means the address of the label will change on the next pass, which might mean
    # a different jump offset might become larger, etc... We run passes until no label changes address.

    output = b''
    label_address = {}
    retry = True
    while retry:
        retry = False
        output = b''
        address = 0
        for op in ops:
            if isinstance(op, Label):
                if label_address.get(op) != address:
                    retry = True
                    label_address[op] = address
                continue

            op_code = opmap[op.name]
            op_arg = op.arg

            if op_arg is None:
                if op_code >= have_argument:
                    # Sanity check.
                    raise ValueError('Opcode %s requires argument.' % op)

                # Encode a single-byte opcode.
                output += encode_op(op_code)
                address += 1
                continue

            if op_code < have_argument:
                # Sanity check.
                raise ValueError('Opcode %s should not have an argument.' % op)

            if isinstance(op_arg, Label):
                if op_code not in hasjump:
                    # Sanity check.
                    raise ValueError('Did not expect label as argument for opcode %s.' % op)

                if op_arg not in ops:
                    # Sanity check.
                    raise ValueError('Label is not part of this op list.')

                # Try to turn the label argument into an address.
                op_arg = label_address.get(op_arg)
                if op_arg is None:
                    # Label hasn't materialized yet, we'll catch it on the next pass.
                    if op_code in hasjabs and address > 65535:
                        # Educated guess that we'll need an extended arg. Might save us a pass.
                        address += 6
                    else:
                        address += 3
                    continue

                if op_code in hasjrel:
                    # Fixup address for relative jump.
                    op_arg -= address + 3
            elif op_code in hasjump:
                # Sanity check.
                raise ValueError('Expected label as argument for opcode %s.' % op)

            if op_arg >= 65536:
                # Encode the extended argument (upper 16 bit of the argument).
                output += encode_op(extended_arg, op_arg >> 16)
                address += 3
                # Adjust the argument to only contain the lower 16 bits.
                op_arg &= 65535

            # Encode the opcode and the argument.
            output += encode_op(op_code, op_arg)
            address += 3

    return output
Ejemplo n.º 16
0
def marshal_load(fp, origin=None):
    """
    Unserialize data serialized with :func:`marshal.dump`. This function
    works across python versions. Marshalled code objects are returned as
    instances of :class:`~pwnypack.bytecode.CodeObject`.

    Arguments:
        fp(file): A file or file-like object that contains the serialized
            data.
        origin(dict): The opcode specification of the python version that
            generated the data. If you provide ``None``, the specs for the
            currently running python version will be used.

    Returns:
        The unserialized data.
    """

    origin = get_py_internals(origin)
    version = origin['version']

    refs = []

    def ref(o, flags):
        if flags & FLAG_REF:
            refs.append(o)
        return o

    def read_byte():
        return six.byte2int(fp.read(1))

    def read_short():
        return u16(fp.read(2), target=MARSHAL_TARGET)

    def read_long():
        return u32(fp.read(4), target=MARSHAL_TARGET)

    def read_int64():
        return u64(fp.read(8), target=MARSHAL_TARGET)

    def read_float_binary():
        return unpack('d', fp.read(8), target=MARSHAL_TARGET)[0]

    def read_bytes():
        return fp.read(read_long())

    def read_bytes_short():
        return fp.read(read_byte())

    def read_float_text():
        return float(read_bytes_short())

    def read_object():
        c = six.byte2int(fp.read(1))
        flags = c & FLAG_REF
        c = ObjectType(c & ~FLAG_REF)

        if c is ObjectType.null:
            return NULL
        elif c is ObjectType.none:
            return None
        elif c is ObjectType.stopiter:
            return StopIteration
        elif c is ObjectType.ellipsis:
            return Ellipsis
        elif c is ObjectType.false:
            return False
        elif c is ObjectType.true:
            return True
        elif c is ObjectType.int:
            return ref(read_long(), flags)
        elif c is ObjectType.int64:
            return ref(read_int64(), flags)
        elif c is ObjectType.long:
            n = read_long()
            r = sum(read_short() << (i * PyLong_MARSHAL_SHIFT)
                    for i in range(abs(n)))
            return ref(-r if n < 0 else r, flags)
        elif c is ObjectType.float:
            return ref(read_float_text(), flags)
        elif c is ObjectType.binary_float:
            return ref(read_float_binary(), flags)
        elif c is ObjectType.complex:
            real = read_float_text()
            imag = read_float_text()
            return ref(complex(real, imag), flags)
        elif c is ObjectType.binary_complex:
            real = read_float_binary()
            imag = read_float_binary()
            return ref(complex(real, imag), flags)
        elif c is ObjectType.string:
            return ref(read_bytes(), flags)
        elif c is ObjectType.unicode:
            return ref(read_bytes().decode('utf-8'), flags)
        elif c is ObjectType.interned:
            if version < 30:
                return ref(read_bytes(), FLAG_REF)
            else:
                return ref(read_bytes().decode('utf-8'), flags)
        elif c is ObjectType.ascii:
            return ref(read_bytes().decode('ascii'), flags)
        elif c is ObjectType.ascii_interned:
            return ref(read_bytes().decode('ascii'), flags)
        elif c is ObjectType.short_ascii:
            return ref(read_bytes_short().decode('ascii'), flags)
        elif c is ObjectType.short_ascii_interned:
            return ref(read_bytes_short().decode('ascii'), flags)
        elif c in (ObjectType.tuple, ObjectType.small_tuple,
                   ObjectType.frozenset):
            ref_index = len(refs)
            ref(NULL, flags)
            r_type = frozenset if c is ObjectType.frozenset else tuple
            n = read_byte() if c is ObjectType.small_tuple else read_long()
            r = r_type(read_object() for _ in range(n))
            if flags & FLAG_REF:
                refs[ref_index] = r
            return r
        elif c is ObjectType.list:
            r = ref([], flags)
            for _ in range(read_long()):
                r.append(read_object())
            return r
        elif c is ObjectType.set:
            r = ref(set(), flags)
            for _ in range(read_long()):
                r.add(read_object())
            return r
        elif c is ObjectType.dict:
            r = ref({}, flags)
            while True:
                k = read_object()
                if k is NULL:
                    break
                r[k] = read_object()
            return r
        elif c in (ObjectType.stringref, ObjectType.ref):
            return refs[read_long()]
        elif c is ObjectType.code:
            ref_index = len(refs)
            ref(NULL, flags)

            co_argcount = read_long()
            if version < 30:
                co_kwonlyargcount = 0
            else:
                co_kwonlyargcount = read_long()
            co_nlocals = read_long()
            co_stacksize = read_long()
            co_flags = read_long()
            co_code = read_object()
            co_consts = read_object()
            co_names = read_object()
            co_varnames = read_object()
            co_freevars = read_object()
            co_cellvars = read_object()
            co_filename = read_object()
            co_name = read_object()
            co_firstlineno = read_long()
            co_lnotab = read_object()

            r = CodeObject(
                co_argcount,
                co_kwonlyargcount,
                co_nlocals,
                co_stacksize,
                co_flags,
                co_code,
                co_consts,
                co_names,
                co_varnames,
                co_filename,
                co_name,
                co_firstlineno,
                co_lnotab,
                co_freevars,
                co_cellvars,
                origin,
            )
            if flags & FLAG_REF:
                refs[ref_index] = r
            return r
        else:
            raise ValueError('Unexpected object type %s.' % c)

    return read_object()