Example #1
0
    def unroll_loop_iterations(self, loop, unroll_count):
        """ Unroll the loop X times. unroll_count + 1 = unroll_factor """
        numops = len(loop.operations)

        renamer = Renamer()
        operations = loop.operations
        unrolled = []
        prohibit_opnums = (rop.GUARD_FUTURE_CONDITION,
                           rop.GUARD_NOT_INVALIDATED)
        orig_jump_args = loop.jump.getarglist()[:]
        # it is assumed that #label_args == #jump_args
        label_arg_count = len(orig_jump_args)
        for u in range(unroll_count):
            # fill the map with the renaming boxes. keys are boxes from the label
            for i in range(label_arg_count):
                la = loop.label.getarg(i)
                ja = loop.jump.getarg(i)
                ja = renamer.rename_box(ja)
                if la != ja:
                    renamer.start_renaming(la, ja)
            #
            for i, op in enumerate(operations):
                if op.getopnum() in prohibit_opnums:
                    continue # do not unroll this operation twice
                copied_op = copy_resop(op)
                if not copied_op.returns_void():
                    # every result assigns a new box, thus creates an entry
                    # to the rename map.
                    renamer.start_renaming(op, copied_op)
                #
                args = copied_op.getarglist()
                for a, arg in enumerate(args):
                    value = renamer.rename_box(arg)
                    copied_op.setarg(a, value)
                # not only the arguments, but also the fail args need
                # to be adjusted. rd_snapshot stores the live variables
                # that are needed to resume.
                if copied_op.is_guard():
                    self.copy_guard_descr(renamer, copied_op)
                #
                unrolled.append(copied_op)

        # the jump arguments have been changed
        # if label(iX) ... jump(i(X+1)) is called, at the next unrolled loop
        # must look like this: label(i(X+1)) ... jump(i(X+2))
        args = loop.jump.getarglist()
        for i, arg in enumerate(args):
            value = renamer.rename_box(arg)
            loop.jump.setarg(i, value)
        #
        loop.operations = operations + unrolled
Example #2
0
class SchedulerState(object):
    def __init__(self, cpu, graph):
        self.cpu = cpu
        self.renamer = Renamer()
        self.graph = graph
        self.oplist = []
        self.worklist = []
        self.invariant_oplist = []
        self.invariant_vector_vars = []
        self.seen = {}
        self.delayed = []

    def resolve_delayed(self, needs_resolving, delayed, op):
        # recursive solving of all delayed objects
        if not delayed:
            return
        args = op.getarglist()
        if op.is_guard():
            args = args[:] + op.getfailargs()
        for arg in args:
            if arg is None or arg.is_constant() or arg.is_inputarg():
                continue
            if arg not in self.seen:
                box = self.renamer.rename_box(arg)
                needs_resolving[box] = None

        indexvars = self.graph.index_vars
        i = len(delayed) - 1
        while i >= 0:
            node = delayed[i]
            op = node.getoperation()
            if op in needs_resolving:
                # either it is a normal operation, or we know that there is a linear combination
                del needs_resolving[op]
                if op in indexvars:
                    opindexvar = indexvars[op]
                    # there might be a variable already, that
                    # calculated the index variable, thus just reuse it
                    for var, indexvar in indexvars.items():
                        if indexvar == opindexvar and var in self.seen:
                            self.renamer.start_renaming(op, var)
                            break
                    else:
                        if opindexvar.calculated_by(op):
                            # just append this operation
                            self.seen[op] = None
                            self.append_to_oplist(op)
                        else:
                            # here is an easier way to calculate just this operation
                            last = op
                            for operation in opindexvar.get_operations():
                                self.append_to_oplist(operation)
                                last = operation
                            indexvars[last] = opindexvar
                            self.renamer.start_renaming(op, last)
                            self.seen[op] = None
                            self.seen[last] = None
                else:
                    self.resolve_delayed(needs_resolving, delayed, op)
                    self.append_to_oplist(op)
                    self.seen[op] = None
                if len(delayed) > i:
                    del delayed[i]
            i -= 1
            # some times the recursive call can remove several items from delayed,
            # thus we correct the index here
            if len(delayed) <= i:
                i = len(delayed) - 1

    def append_to_oplist(self, op):
        self.renamer.rename(op)
        self.oplist.append(op)

    def schedule(self):
        self.prepare()
        Scheduler().walk_and_emit(self)
        self.post_schedule()

    def post_schedule(self):
        loop = self.graph.loop
        jump = loop.jump
        if self.delayed:
            # some operations can be delayed until the jump instruction,
            # handle them here
            self.resolve_delayed({}, self.delayed, jump)
        self.renamer.rename(jump)
        loop.operations = self.oplist

    def profitable(self):
        return True

    def prepare(self):
        for node in self.graph.nodes:
            if node.depends_count() == 0:
                self.worklist.insert(0, node)

    def try_emit_or_delay(self, node):
        if not node.is_imaginary() and node.is_pure():
            # this operation might never be emitted. only if it is really needed
            self.delay_emit(node)
            return
        # emit a now!
        self.pre_emit(node, True)
        self.mark_emitted(node)
        if not node.is_imaginary():
            op = node.getoperation()
            self.seen[op] = None
            self.append_to_oplist(op)

    def delay_emit(self, node):
        """ it has been decided that the operation might be scheduled later """
        delayed = node.delayed or []
        if node not in delayed:
            delayed.append(node)
        node.delayed = None
        provides = node.provides()
        if len(provides) == 0:
            for n in delayed:
                self.delayed.append(n)
        else:
            for to in node.provides():
                tnode = to.target_node()
                self.delegate_delay(tnode, delayed[:])
        self.mark_emitted(node)

    def delegate_delay(self, node, delayed):
        """ Chain up delays, this can reduce many more of the operations """
        if node.delayed is None:
            node.delayed = delayed
        else:
            delayedlist = node.delayed
            for d in delayed:
                if d not in delayedlist:
                    delayedlist.append(d)

    def mark_emitted(state, node, unpack=True):
        """ An operation has been emitted, adds new operations to the worklist
            whenever their dependency count drops to zero.
            Keeps worklist sorted (see priority) """
        worklist = state.worklist
        provides = node.provides()[:]
        for dep in provides:  # COPY
            target = dep.to
            node.remove_edge_to(target)
            if not target.emitted and target.depends_count() == 0:
                # sorts them by priority
                i = len(worklist) - 1
                while i >= 0:
                    cur = worklist[i]
                    c = (cur.priority - target.priority)
                    if c < 0:  # meaning itnode.priority < target.priority:
                        worklist.insert(i + 1, target)
                        break
                    elif c == 0:
                        # if they have the same priority, sort them
                        # using the original position in the trace
                        if target.getindex() < cur.getindex():
                            worklist.insert(i + 1, target)
                            break
                    i -= 1
                else:
                    worklist.insert(0, target)
        node.clear_dependencies()
        node.emitted = True
        if not node.is_imaginary():
            op = node.getoperation()
            state.renamer.rename(op)
            if unpack:
                state.ensure_args_unpacked(op)
            state.post_emit(node)

    def delay(self, node):
        return False

    def has_more(self):
        return len(self.worklist) > 0

    def ensure_args_unpacked(self, op):
        pass

    def post_emit(self, node):
        pass

    def pre_emit(self, orignode, pack_first=True):
        delayed = orignode.delayed
        if delayed:
            # there are some nodes that have been delayed just for this operation
            if pack_first:
                op = orignode.getoperation()
                self.resolve_delayed({}, delayed, op)

            for node in delayed:
                op = node.getoperation()
                if op in self.seen:
                    continue
                if node is not None:
                    provides = node.provides()
                    if len(provides) == 0:
                        # add this node to the final delay list
                        # might be emitted before jump!
                        self.delayed.append(node)
                    else:
                        for to in node.provides():
                            tnode = to.target_node()
                            self.delegate_delay(tnode, [node])
            orignode.delayed = None
Example #3
0
    def unroll_loop_iterations(self, loop, unroll_count, align_unroll_once=False):
        """ Unroll the loop `unroll_count` times. There can be an additional unroll step
            if alignment might benefit """
        numops = len(loop.operations)

        renamer = Renamer()
        operations = loop.operations
        orig_jump_args = loop.jump.getarglist()[:]
        prohibit_opnums = (rop.GUARD_FUTURE_CONDITION,
                           rop.GUARD_NOT_INVALIDATED,
                           rop.DEBUG_MERGE_POINT)
        unrolled = []

        if align_unroll_once:
            unroll_count += 1

        # it is assumed that #label_args == #jump_args
        label_arg_count = len(orig_jump_args)
        label = loop.label
        jump = loop.jump
        new_label = loop.label
        for u in range(unroll_count):
            # fill the map with the renaming boxes. keys are boxes from the label
            for i in range(label_arg_count):
                la = label.getarg(i)
                ja = jump.getarg(i)
                ja = renamer.rename_box(ja)
                if la != ja:
                    renamer.start_renaming(la, ja)
            #
            for i, op in enumerate(operations):
                if op.getopnum() in prohibit_opnums:
                    continue # do not unroll this operation twice
                copied_op = copy_resop(op)
                if not copied_op.returns_void():
                    # every result assigns a new box, thus creates an entry
                    # to the rename map.
                    renamer.start_renaming(op, copied_op)
                #
                args = copied_op.getarglist()
                for a, arg in enumerate(args):
                    value = renamer.rename_box(arg)
                    copied_op.setarg(a, value)
                # not only the arguments, but also the fail args need
                # to be adjusted. rd_snapshot stores the live variables
                # that are needed to resume.
                if copied_op.is_guard():
                    self.copy_guard_descr(renamer, copied_op)
                #
                unrolled.append(copied_op)
            #
            if align_unroll_once and u == 0:
                descr = label.getdescr()
                args = label.getarglist()[:]
                new_label = ResOperation(rop.LABEL, args, descr)
                renamer.rename(new_label)
            #

        # the jump arguments have been changed
        # if label(iX) ... jump(i(X+1)) is called, at the next unrolled loop
        # must look like this: label(i(X+1)) ... jump(i(X+2))
        args = loop.jump.getarglist()
        for i, arg in enumerate(args):
            value = renamer.rename_box(arg)
            loop.jump.setarg(i, value)
        #
        loop.label = new_label
        if align_unroll_once:
            loop.align_operations = operations
            loop.operations = unrolled
        else:
            loop.operations = operations + unrolled
Example #4
0
class SchedulerState(object):
    def __init__(self, cpu, graph):
        self.cpu = cpu
        self.renamer = Renamer()
        self.graph = graph
        self.oplist = []
        self.worklist = []
        self.invariant_oplist = []
        self.invariant_vector_vars = []
        self.seen = {}
        self.delayed = []

    def resolve_delayed(self, needs_resolving, delayed, op):
        # recursive solving of all delayed objects
        if not delayed:
            return
        args = op.getarglist()
        if op.is_guard():
            args = args[:] + op.getfailargs()
        for arg in args:
            if arg is None or arg.is_constant() or arg.is_inputarg():
                continue
            if arg not in self.seen:
                box = self.renamer.rename_box(arg)
                needs_resolving[box] = None

        indexvars = self.graph.index_vars
        i = len(delayed)-1
        while i >= 0:
            node = delayed[i]
            op = node.getoperation()
            if op in needs_resolving:
                # either it is a normal operation, or we know that there is a linear combination
                del needs_resolving[op]
                if op in indexvars:
                    opindexvar = indexvars[op]
                    # there might be a variable already, that
                    # calculated the index variable, thus just reuse it
                    for var, indexvar in indexvars.items(): 
                        if indexvar == opindexvar and var in self.seen:
                            self.renamer.start_renaming(op, var)
                            break
                    else:
                        if opindexvar.calculated_by(op):
                            # just append this operation
                            self.seen[op] = None
                            self.append_to_oplist(op)
                        else:
                            # here is an easier way to calculate just this operation
                            last = op
                            for operation in opindexvar.get_operations():
                                self.append_to_oplist(operation)
                                last = operation
                            indexvars[last] = opindexvar
                            self.renamer.start_renaming(op, last)
                            self.seen[op] = None
                            self.seen[last] = None
                else: 
                    self.resolve_delayed(needs_resolving, delayed, op)
                    self.append_to_oplist(op)
                    self.seen[op] = None
                if len(delayed) > i:
                    del delayed[i]
            i -= 1
            # some times the recursive call can remove several items from delayed,
            # thus we correct the index here
            if len(delayed) <= i:
                i = len(delayed)-1

    def append_to_oplist(self, op):
        self.renamer.rename(op)
        self.oplist.append(op)

    def schedule(self):
        self.prepare()
        Scheduler().walk_and_emit(self)
        self.post_schedule()

    def post_schedule(self):
        loop = self.graph.loop
        jump = loop.jump
        if self.delayed:
            # some operations can be delayed until the jump instruction,
            # handle them here
            self.resolve_delayed({}, self.delayed, jump)
        self.renamer.rename(jump)
        loop.operations = self.oplist

    def profitable(self):
        return True

    def prepare(self):
        for node in self.graph.nodes:
            if node.depends_count() == 0:
                self.worklist.insert(0, node)

    def try_emit_or_delay(self, node):
        if not node.is_imaginary() and node.is_pure():
            # this operation might never be emitted. only if it is really needed
            self.delay_emit(node)
            return
        # emit a now!
        self.pre_emit(node, True)
        self.mark_emitted(node)
        if not node.is_imaginary():
            op = node.getoperation()
            self.seen[op] = None
            self.append_to_oplist(op)

    def delay_emit(self, node):
        """ it has been decided that the operation might be scheduled later """
        delayed = node.delayed or []
        if node not in delayed:
            delayed.append(node)
        node.delayed = None
        provides = node.provides()
        if len(provides) == 0:
            for n in delayed:
                self.delayed.append(n)
        else:
            for to in node.provides():
                tnode = to.target_node()
                self.delegate_delay(tnode, delayed[:])
        self.mark_emitted(node)

    def delegate_delay(self, node, delayed):
        """ Chain up delays, this can reduce many more of the operations """
        if node.delayed is None:
            node.delayed = delayed
        else:
            delayedlist = node.delayed
            for d in delayed:
                if d not in delayedlist:
                    delayedlist.append(d)


    def mark_emitted(state, node, unpack=True):
        """ An operation has been emitted, adds new operations to the worklist
            whenever their dependency count drops to zero.
            Keeps worklist sorted (see priority) """
        worklist = state.worklist
        provides = node.provides()[:]
        for dep in provides: # COPY
            target = dep.to
            node.remove_edge_to(target)
            if not target.emitted and target.depends_count() == 0:
                # sorts them by priority
                i = len(worklist)-1
                while i >= 0:
                    cur = worklist[i]
                    c = (cur.priority - target.priority)
                    if c < 0: # meaning itnode.priority < target.priority:
                        worklist.insert(i+1, target)
                        break
                    elif c == 0:
                        # if they have the same priority, sort them
                        # using the original position in the trace
                        if target.getindex() < cur.getindex():
                            worklist.insert(i+1, target)
                            break
                    i -= 1
                else:
                    worklist.insert(0, target)
        node.clear_dependencies()
        node.emitted = True
        if not node.is_imaginary():
            op = node.getoperation()
            state.renamer.rename(op)
            if unpack:
                state.ensure_args_unpacked(op)
            state.post_emit(node)


    def delay(self, node):
        return False

    def has_more(self):
        return len(self.worklist) > 0

    def ensure_args_unpacked(self, op):
        pass

    def post_emit(self, node):
        pass

    def pre_emit(self, orignode, pack_first=True):
        delayed = orignode.delayed
        if delayed:
            # there are some nodes that have been delayed just for this operation
            if pack_first:
                op = orignode.getoperation()
                self.resolve_delayed({}, delayed, op)

            for node in delayed:
                op = node.getoperation()
                if op in self.seen:
                    continue
                if node is not None:
                    provides = node.provides()
                    if len(provides) == 0:
                        # add this node to the final delay list
                        # might be emitted before jump!
                        self.delayed.append(node)
                    else:
                        for to in node.provides():
                            tnode = to.target_node()
                            self.delegate_delay(tnode, [node])
            orignode.delayed = None