Exemple #1
0
def test_value_pool_no_default():
    test1, test2 = object(), object()
    pool = ValuePool(None)

    assert pool.index_for(test1) == 0
    assert pool.value_at(0) == test1

    assert pool.index_for(test2) == 1
    assert pool.value_at(1) == test2
Exemple #2
0
def test_value_pool_no_default():
    test1, test2 = object(), object()
    pool = ValuePool(None)

    assert pool.index_for(test1) == 0
    assert pool.value_at(0) == test1

    assert pool.index_for(test2) == 1
    assert pool.value_at(1) == test2
Exemple #3
0
def test_value_pool_default():
    test = object()
    pool = ValuePool(None, default=test)

    assert pool.value_at(0) == test
    assert pool.index_for(test) == 0

    assert pool.index_for(1) == 1
    assert pool.index_for(1) == 1

    assert pool.value_at(0) == test
    assert pool.value_at(1) == 1
Exemple #4
0
def test_value_pool_default():
    test = object()
    pool = ValuePool(None, default=test)

    assert pool.value_at(0) == test
    assert pool.index_for(test) == 0

    assert pool.index_for(1) == 1
    assert pool.index_for(1) == 1

    assert pool.value_at(0) == test
    assert pool.value_at(1) == 1
Exemple #5
0
class CodeAssembler(object):
    implements(IConstantPoolWriter)

    def __init__(self, local_names):
        self.local_names = local_names
        self.locals = ValuePool(None)
        for i in local_names:
            self.locals.index_for(i)

        self.instructions = []

        self._stack_depth = 0
        self._scope_depth = 0

        self.max_local_count = len(local_names)
        self.max_stack_depth = 0
        self.max_scope_depth = 0

        # jump-like instructions
        self.jumps = []

        # name -> Label
        self.labels = {}

        self.flags = 0

    def make_label(self, name):
        label = Label(name)
        label.stack_depth, label.scope_depth = self.stack_depth, self.scope_depth
        return self.labels.setdefault(name, label)

    def emit(self, name, *a, **kw):
        """
        Emit an instruction, with given arguments.
        """
        return self.add_instruction(get_instruction(name)(*a, **kw))

    def add_instruction(self, instruction):
        """
        Add an instruction to this block.
        """
        self.instructions.append(instruction)
        instruction.assembler_added(self)
        return instruction

    def add_instructions(self, instructions):
        """
        Iterate over the given argument and add these instructions,
        one by one, to this assembler.
        """
        for i in instructions:
            self.add_instruction(i)

    def optimize(self):
        """
        Do some not so simple optimizations.
        """
        for _ in xrange(2):
            jumps = {}

            # instructions to remove
            remv_inst = set()

            # register -> set([prev, curr])
            remv_regs = {}

            institer = iter(self.instructions)
            instructions = [institer.next()]

            # First pass - mark anything needed for the second pass.
            for newinst in institer:
                instructions.append(newinst)
                prev, curr = instructions[-2:]

                # Gather all jumps by their respective labels.
                if curr.jumplike:
                    jumps.setdefault(curr.labelname, []).append(curr)

                S = set((prev, curr))

                # Detect if there are any references to this register other than
                # the setlocal/getlocal sequence here.
                if prev.name in SET_LOCALS and \
                   curr.name in GET_LOCALS and \
                   prev.argument == curr.argument:
                    remv_inst.update(S)
                    remv_regs[curr.argument] = S

                # PyPy-specific optimization: some opcodes have an unnecessary
                # StoreResult at the end, so callpropvoid and some setproperty's
                # have a pushnull and an unused register afterwards. Stop that.
                elif prev.name in ("pushnull", "pushundefined") and \
                     curr.name in SET_LOCALS:
                    remv_inst.update(S)
                    remv_regs[curr.argument] = S

                elif curr.name in GET_LOCALS:
                    if curr.argument in remv_regs:
                        remv_inst -= remv_regs[curr.argument]

                elif curr.name in SET_LOCALS:
                    # If we have any
                    remv_regs.pop(curr.argument, None)

                elif curr.name == "kill":
                    # If we're going to remove this register, mark the kill for deletion too.
                    if curr.argument in remv_regs:
                        remv_inst.add(curr)

                prev = curr

            institer = iter(self.instructions)
            instructions = [institer.next()]
            # Second pass
            for newinst in institer:
                instructions.append(newinst)
                keep_going = True
                while keep_going:
                    prev, curr = instructions[-2:]
                    test = prev.name, curr.name

                    # Prevent errors with jumps and lone labels.
                    if curr.name == "label" and curr.labelname not in jumps:
                        instructions = instructions[:-1]

                    # Branch optimizations geared for PyPy.
                    elif test in BRANCH_OPTIMIZE:
                        instructions = instructions[:-2]
                        new = get_instruction(BRANCH_OPTIMIZE[test])(
                            curr.labelname)
                        jumps[curr.labelname].remove(curr)
                        jumps[curr.labelname].append(new)
                        instructions.append(new)

                    # Two opcodes in a row that do nothing should be removed.
                    elif prev.name in GET_LOCALS and curr.name in SET_LOCALS \
                             and prev.argument == curr.argument:
                        instructions = instructions[:-2]

                    # jump then label -> remove the jump.
                    elif test == ("jump", "label"
                                  ) and prev.labelname == curr.labelname:
                        # Don't remove the label, we may need it for a backref later on.
                        instructions.pop(-2)
                        jumps[prev.labelname].remove(prev)

                    # return after return -> remove the second return.
                    elif prev.name in ("returnvalue", "returnvoid") and \
                         curr.name in ("returnvalue", "returnvoid"):
                        instructions.pop()

                    # label then jump -> remove both and rename.
                    elif test == ("label", "jump"):
                        for jump in jumps[prev.labelname]:
                            jump.labelname = curr.labelname
                        jumps[curr.labelname].remove(curr)
                        jumps[curr.labelname].extend(jumps[prev.labelname])
                        del jumps[prev.labelname]
                        instructions = instructions[:-2]

                    elif curr in remv_inst:
                        instructions.pop()

                    else:
                        keep_going = False

            self.instructions = instructions

        # Third pass - pack in those registers.
        institer = iter(self.instructions)
        inst = [institer.next()]
        # local_names is the arguments to the method
        used_registers = dict((i, i) for i in xrange(len(self.local_names)))

        for newinst in institer:
            curr = inst[-1]
            if curr.name in SET_LOCALS:
                index = used_registers.setdefault(curr.argument,
                                                  len(used_registers))
                instructions[-1] = get_instruction('setlocal')(index)

            elif curr.name in GET_LOCALS:
                inst[-1] = get_instruction('getlocal')(used_registers.get(
                    curr.argument, curr.argument))

            elif curr.name == "kill":
                inst[-1] = get_instruction('kill')(used_registers.get(
                    curr.argument, curr.argument))

        self.numlocals = len(used_registers)
        self.instructions = instructions

    # ========================================
    # STACK DEPTH and SCOPE DEPTH tracking

    def get_stack_depth(self):
        return self._stack_depth

    def set_stack_depth(self, value):
        self._stack_depth = value
        if value > self.max_stack_depth:
            self.max_stack_depth = value

    stack_depth = property(get_stack_depth, set_stack_depth)

    def get_scope_depth(self):
        return self._scope_depth

    def set_scope_depth(self, value):
        self._scope_depth = value
        if value > self.max_scope_depth:
            self.max_scope_depth = value

    scope_depth = property(get_scope_depth, set_scope_depth)

    # ========================================
    # LOCAL tracking

    @property
    def next_free_local(self):
        """
        Return the index of the next empty local.
        """
        return self.locals.next_free()

    def set_local(self, name):
        """
        Mark the register named "name" as set and return
        the index.
        """
        index = self.locals.index_for(name)
        if self.local_count > self.max_local_count:
            self.max_local_count = self.local_count
        return index

    def get_local(self, name):
        """
        Return the index for the local named "name".
        """
        return self.locals.get_index(name)

    def kill_local(self, name):
        """
        Mark the register named "name" as free and return
        the index.
        """
        index = self.locals.kill(name)
        if self.local_count > self.max_local_count:
            self.max_local_count = self.local_count
        return index

    def has_local(self, name):
        """
        Returns True if we have a register named "name" in the current
        assembler context.
        """
        return name in self.locals

    @property
    def local_count(self):
        """
        The current local count.
        """
        return len(self.locals)

    def dump_instructions(self, exceptions=[], use_label_names=False):
        """
        Dump this assembler's instructions to a string, with the given
        "indent" prepended to each line. If "use_label_names" is True,
        then the label names will be used when dumping, otherwise
        label names will be generated. Label names are not kept in
        compiled code, so it makes to set this to False when dumping
        parsed code.
        """

        # label name => remapped label name
        lblmap = {}

        # address => [exception, ...]
        exc_from = {}
        exc_to = {}
        for exc in exceptions:
            exc_from.setdefault(exc.from_, []).append(exc)
            exc_to.setdefault(exc.to_, []).append(exc)

        for inst in self.instructions:
            inst.assembler_pass1(self)
            if inst.label and not use_label_names:
                lblmap[inst.label.name] = "L%d" % (len(lblmap) + 1)

        dump, offset = [], 0
        for inst in self.instructions:

            if inst.label:
                lblname = inst.label.name
                lblname = lblmap.get(lblname, lblname)
                if inst.jumplike:
                    # we're jumping to the label -- get us a mapped label
                    inst.labelname = lblname
                else:
                    # we're defining a label
                    dump.append("\n%s:" % (lblname, ))

            for exc in exc_from.get(offset, []):
                dump.append("<%s %d" % (exc.exc_type, exc.target))
            for exc in exc_to.get(offset, []):
                dump.append(">%s %d" % (exc.exc_type, exc.target))

            dump.append("%d\t%s" % (offset, inst))
            offset += len(inst)

        return '\n'.join(dump)

    def pass1(self):
        """
        Do assembler pass 1.
        """
        # Pass 1.
        for inst in self.instructions:
            inst.assembler_pass1(self)

    def write_constants(self, pool):
        for inst in self.instructions:
            pool.write(inst)

    def serialize(self):
        """
        Serialize this code to a string, and also
        resolve any jump offsets.
        """
        code = StringIO()
        # Pass 2. Generate code.
        for inst in self.instructions:
            inst.assembler_pass2(self, code.tell())
            code.write(inst.serialize())

        # Patch up jumps.
        for inst in self.jumps:
            assert inst in self.instructions
            code.seek(inst.address + 1)
            code.write(inst.label.relative_offset(inst.address + 4))
        return code.getvalue()

    @classmethod
    def parse(cls, bitstream, abc, constants, local_count):
        asm = cls(["_loc%d" % (i, ) for i in xrange(local_count)])
        codelen = bitstream.read(U32)
        finish = bitstream.tell() + codelen * 8
        while bitstream.tell() < finish:
            asm.add_instruction(
                parse_instruction(bitstream, abc, constants, asm))
        return asm
Exemple #6
0
class CodeAssembler(object):
    implements(IConstantPoolWriter)

    def __init__(self, local_names):
        self.local_names = local_names
        self.locals = ValuePool(None)
        for i in local_names:
            self.locals.index_for(i)

        self.instructions = []

        self._stack_depth = 0
        self._scope_depth = 0

        self.max_local_count = len(local_names)
        self.max_stack_depth = 0
        self.max_scope_depth = 0

        # jump-like instructions
        self.jumps = []

        # name -> Label
        self.labels = {}

        self.flags = 0

    def make_label(self, name):
        label = Label(name)
        label.stack_depth, label.scope_depth = self.stack_depth, self.scope_depth
        return self.labels.setdefault(name, label)

    def emit(self, name, *a, **kw):
        """
        Emit an instruction, with given arguments.
        """
        print "   ", name, a
        return self.add_instruction(get_instruction(name)(*a, **kw))

    def add_instruction(self, instruction):
        """
        Add an instruction to this block.
        """
        self.instructions.append(instruction)
        instruction.assembler_added(self)
        return instruction

    def add_instructions(self, instructions):
        """
        Iterate over the given argument and add these instructions,
        one by one, to this assembler.
        """
        for i in instructions:
            self.add_instruction(i)

    def optimize(self):
        """
        Do some not so simple optimizations.
        """
        for _ in xrange(2):
            jumps = {}

            # instructions to remove
            remv_inst = set()

            # register -> set([prev, curr])
            remv_regs = {}

            institer = iter(self.instructions)
            instructions = [institer.next()]

            # First pass - mark anything needed for the second pass.
            for newinst in institer:
                instructions.append(newinst)
                prev, curr = instructions[-2:]

                # Gather all jumps by their respective labels.
                if curr.jumplike:
                    jumps.setdefault(curr.labelname, []).append(curr)

                S = set((prev, curr))

                # Detect if there are any references to this register other than
                # the setlocal/getlocal sequence here.
                if prev.name in SET_LOCALS and curr.name in GET_LOCALS and prev.argument == curr.argument:
                    remv_inst.update(S)
                    remv_regs[curr.argument] = S

                # PyPy-specific optimization: some opcodes have an unnecessary
                # StoreResult at the end, so callpropvoid and some setproperty's
                # have a pushnull and an unused register afterwards. Stop that.
                elif prev.name in ("pushnull", "pushundefined") and curr.name in SET_LOCALS:
                    remv_inst.update(S)
                    remv_regs[curr.argument] = S

                elif curr.name in GET_LOCALS:
                    if curr.argument in remv_regs:
                        remv_inst -= remv_regs[curr.argument]

                elif curr.name in SET_LOCALS:
                    # If we have any
                    remv_regs.pop(curr.argument, None)

                elif curr.name == "kill":
                    # If we're going to remove this register, mark the kill for deletion too.
                    if curr.argument in remv_regs:
                        remv_inst.add(curr)

                prev = curr

            institer = iter(self.instructions)
            instructions = [institer.next()]
            # Second pass
            for newinst in institer:
                instructions.append(newinst)
                keep_going = True
                while keep_going:
                    prev, curr = instructions[-2:]
                    test = prev.name, curr.name

                    # Prevent errors with jumps and lone labels.
                    if curr.name == "label" and curr.labelname not in jumps:
                        instructions = instructions[:-1]

                    # Branch optimizations geared for PyPy.
                    elif test in BRANCH_OPTIMIZE:
                        instructions = instructions[:-2]
                        new = get_instruction(BRANCH_OPTIMIZE[test])(curr.labelname)
                        jumps[curr.labelname].remove(curr)
                        jumps[curr.labelname].append(new)
                        instructions.append(new)

                    # Two opcodes in a row that do nothing should be removed.
                    elif prev.name in GET_LOCALS and curr.name in SET_LOCALS and prev.argument == curr.argument:
                        instructions = instructions[:-2]

                    # jump then label -> remove the jump.
                    elif test == ("jump", "label") and prev.labelname == curr.labelname:
                        # Don't remove the label, we may need it for a backref later on.
                        instructions.pop(-2)
                        jumps[prev.labelname].remove(prev)

                    # return after return -> remove the second return.
                    elif prev.name in ("returnvalue", "returnvoid") and curr.name in ("returnvalue", "returnvoid"):
                        instructions.pop()

                    # label then jump -> remove both and rename.
                    elif test == ("label", "jump"):
                        for jump in jumps[prev.labelname]:
                            jump.labelname = curr.labelname
                        jumps[curr.labelname].remove(curr)
                        jumps[curr.labelname].extend(jumps[prev.labelname])
                        del jumps[prev.labelname]
                        instructions = instructions[:-2]

                    elif curr in remv_inst:
                        instructions.pop()

                    else:
                        keep_going = False

            self.instructions = instructions

        # Third pass - pack in those registers.
        institer = iter(self.instructions)
        inst = [institer.next()]
        # local_names is the arguments to the method
        used_registers = dict((i, i) for i in xrange(len(self.local_names)))

        for newinst in institer:
            curr = inst[-1]
            if curr.name in SET_LOCALS:
                index = used_registers.setdefault(curr.argument, len(used_registers))
                instructions[-1] = get_instruction("setlocal")(index)

            elif curr.name in GET_LOCALS:
                inst[-1] = get_instruction("getlocal")(used_registers.get(curr.argument, curr.argument))

            elif curr.name == "kill":
                inst[-1] = get_instruction("kill")(used_registers.get(curr.argument, curr.argument))

        self.numlocals = len(used_registers)
        self.instructions = instructions

    # ========================================
    # STACK DEPTH and SCOPE DEPTH tracking

    def get_stack_depth(self):
        return self._stack_depth

    def set_stack_depth(self, value):
        self._stack_depth = value
        if value > self.max_stack_depth:
            self.max_stack_depth = value

    stack_depth = property(get_stack_depth, set_stack_depth)

    def get_scope_depth(self):
        return self._scope_depth

    def set_scope_depth(self, value):
        self._scope_depth = value
        if value > self.max_scope_depth:
            self.max_scope_depth = value

    scope_depth = property(get_scope_depth, set_scope_depth)

    # ========================================
    # LOCAL tracking

    @property
    def next_free_local(self):
        """
        Return the index of the next empty local.
        """
        return self.locals.next_free()

    def set_local(self, name):
        """
        Mark the register named "name" as set and return
        the index.
        """
        index = self.locals.index_for(name)
        if self.local_count > self.max_local_count:
            self.max_local_count = self.local_count
        return index

    def get_local(self, name):
        """
        Return the index for the local named "name".
        """
        return self.locals.get_index(name)

    def kill_local(self, name):
        """
        Mark the register named "name" as free and return
        the index.
        """
        index = self.locals.kill(name)
        if self.local_count > self.max_local_count:
            self.max_local_count = self.local_count
        return index

    def has_local(self, name):
        """
        Returns True if we have a register named "name" in the current
        assembler context.
        """
        return name in self.locals

    @property
    def local_count(self):
        """
        The current local count.
        """
        return len(self.locals)

    def dump_instructions(self, exceptions=[], use_label_names=False):
        """
        Dump this assembler's instructions to a string, with the given
        "indent" prepended to each line. If "use_label_names" is True,
        then the label names will be used when dumping, otherwise
        label names will be generated. Label names are not kept in
        compiled code, so it makes to set this to False when dumping
        parsed code.
        """

        # label name => remapped label name
        lblmap = {}

        # address => [exception, ...]
        exc_from = {}
        exc_to = {}
        for exc in exceptions:
            exc_from.setdefault(exc.from_, []).append(exc)
            exc_to.setdefault(exc.to_, []).append(exc)

        for inst in self.instructions:
            inst.assembler_pass1(self)
            if inst.label and not use_label_names:
                lblmap[inst.label.name] = "L%d" % (len(lblmap) + 1)

        dump, offset = [], 0
        for inst in self.instructions:

            if inst.label:
                lblname = inst.label.name
                lblname = lblmap.get(lblname, lblname)
                if inst.jumplike:
                    # we're jumping to the label -- get us a mapped label
                    inst.labelname = lblname
                else:
                    # we're defining a label
                    dump.append("\n%s:" % (lblname,))

            for exc in exc_from.get(offset, []):
                dump.append("<%s %d" % (exc.exc_type, exc.target))
            for exc in exc_to.get(offset, []):
                dump.append(">%s %d" % (exc.exc_type, exc.target))

            dump.append("%d\t%s" % (offset, inst))
            offset += len(inst)

        return "\n".join(dump)

    def pass1(self):
        """
        Do assembler pass 1.
        """
        # Pass 1.
        for inst in self.instructions:
            inst.assembler_pass1(self)

    def write_constants(self, pool):
        for inst in self.instructions:
            pool.write(inst)

    def serialize(self):
        """
        Serialize this code to a string, and also
        resolve any jump offsets.
        """
        code = StringIO()
        # Pass 2. Generate code.
        for inst in self.instructions:
            inst.assembler_pass2(self, code.tell())
            code.write(inst.serialize())

        # Patch up jumps.
        for inst in self.jumps:
            assert inst in self.instructions
            code.seek(inst.address + 1)
            code.write(inst.label.relative_offset(inst.address + 4))
        return code.getvalue()

    @classmethod
    def parse(cls, bitstream, abc, constants, local_count):
        asm = cls(["_loc%d" % (i,) for i in xrange(local_count)])
        codelen = bitstream.read(U32)
        finish = bitstream.tell() + codelen * 8
        while bitstream.tell() < finish:
            asm.add_instruction(parse_instruction(bitstream, abc, constants, asm))
        return asm