예제 #1
0
    def vectoroptimizer_unrolled(self, loop, unroll_factor=-1):
        opt = self.vectoroptimizer(loop)
        opt.linear_find_smallest_type(loop)
        loop.setup_vectorization()
        if unroll_factor == -1 and opt.smallest_type_bytes == 0:
            raise NotAVectorizeableLoop()
        if unroll_factor == -1:
            unroll_factor = opt.get_unroll_count(ARCH_VEC_REG_SIZE)
            print ""
            print "unroll factor: ", unroll_factor, opt.smallest_type_bytes
        self.show_dot_graph(DependencyGraph(loop),
                            "original_" + self.test_name)
        graph = opt.analyse_index_calculations(loop)
        if graph is not None:
            cycle = graph.cycles()
            if cycle is not None:
                print "CYCLE found %s" % cycle
            self.show_dot_graph(graph, "early_exit_" + self.test_name)
            assert cycle is None
            state = SchedulerState(graph)
            opt.schedule(state)
        opt.unroll_loop_iterations(loop, unroll_factor)
        self.debug_print_operations(loop)
        graph = DependencyGraph(loop)
        self.last_graph = graph  # legacy for test_dependency
        self.show_dot_graph(graph, self.test_name)

        def gmr(i):
            return graph.memory_refs[graph.nodes[i]]

        graph.getmemref = gmr
        return opt, graph
예제 #2
0
 def vectoroptimizer_unrolled(self, loop, unroll_factor = -1):
     opt = self.vectoroptimizer(loop)
     opt.linear_find_smallest_type(loop)
     loop.setup_vectorization()
     if unroll_factor == -1 and opt.smallest_type_bytes == 0:
         raise NotAVectorizeableLoop()
     if unroll_factor == -1:
         unroll_factor = opt.get_unroll_count(ARCH_VEC_REG_SIZE)
         print ""
         print "unroll factor: ", unroll_factor, opt.smallest_type_bytes
     self.show_dot_graph(DependencyGraph(loop), "original_" + self.test_name)
     graph = opt.analyse_index_calculations(loop)
     if graph is not None:
         cycle = graph.cycles()
         if cycle is not None:
             print "CYCLE found %s" % cycle
         self.show_dot_graph(graph, "early_exit_" + self.test_name)
         assert cycle is None
         state = SchedulerState(graph)
         opt.schedule(state)
     opt.unroll_loop_iterations(loop, unroll_factor)
     self.debug_print_operations(loop)
     graph = DependencyGraph(loop)
     self.last_graph = graph # legacy for test_dependency
     self.show_dot_graph(graph, self.test_name)
     def gmr(i):
         return graph.memory_refs[graph.nodes[i]]
     graph.getmemref = gmr
     return opt, graph
예제 #3
0
 def build_dependency(self, ops):
     loop = self.parse_loop(ops)
     graph = DependencyGraph(loop)
     self.show_dot_graph(graph, self.test_name)
     for node in graph.nodes:
         assert node.independent(node)
     graph.parsestr = ops
     return graph
예제 #4
0
 def build_dependency(self, ops):
     loop = self.parse_loop(ops)
     graph = DependencyGraph(loop)
     self.show_dot_graph(graph, self.test_name)
     for node in graph.nodes:
         assert node.independent(node)
     graph.parsestr = ops
     return graph
예제 #5
0
파일: vector.py 프로젝트: Mu-L/pypy
    def run_optimization(self, metainterp_sd, info, loop, jitcell_token,
                         user_code):
        self.orig_label_args = loop.label.getarglist_copy()
        self.linear_find_smallest_type(loop)
        byte_count = self.smallest_type_bytes
        vsize = self.vector_ext.vec_size()
        # stop, there is no chance to vectorize this trace
        # we cannot optimize normal traces (if there is no label)
        if vsize == 0:
            debug_print("vector size is zero")
            raise NotAVectorizeableLoop
        if byte_count == 0:
            debug_print("could not find smallest type")
            raise NotAVectorizeableLoop
        if loop.label.getopnum() != rop.LABEL:
            debug_print("not a loop, can only vectorize loops")
            raise NotAVectorizeableLoop
        # find index guards and move to the earliest position
        graph = self.analyse_index_calculations(loop)
        if graph is not None:
            state = SchedulerState(metainterp_sd.cpu, graph)
            self.schedule(state)  # reorder the trace

        # unroll
        self.unroll_count = self.get_unroll_count(vsize)
        align_unroll = self.unroll_count==1 and \
                       self.vector_ext.should_align_unroll
        self.unroll_loop_iterations(loop,
                                    self.unroll_count,
                                    align_unroll_once=align_unroll)

        # vectorize
        graph = DependencyGraph(loop)
        self.find_adjacent_memory_refs(graph)
        self.extend_packset()
        self.combine_packset()
        costmodel = GenericCostModel(self.cpu, self.cost_threshold)
        state = VecScheduleState(graph, self.packset, self.cpu, costmodel)
        self.schedule(state)
        if not state.profitable():
            raise NotAProfitableLoop
        gso = GuardStrengthenOpt(graph.index_vars)
        gso.propagate_all_forward(info, loop, user_code)

        # re-schedule the trace -> removes many pure operations
        graph = DependencyGraph(loop)
        state = SchedulerState(self.cpu, graph)
        state.schedule()

        info.extra_before_label = loop.align_operations
        for op in loop.align_operations:
            op.set_forwarded(None)

        return loop.finaloplist(jitcell_token=jitcell_token,
                                reset_label_token=False)
예제 #6
0
파일: vector.py 프로젝트: sota/pypy-old
    def run_optimization(self, info, loop):
        self.orig_label_args = loop.label.getarglist_copy()
        self.linear_find_smallest_type(loop)
        byte_count = self.smallest_type_bytes
        vsize = self.cpu.vector_register_size
        if vsize == 0 or byte_count == 0 or loop.label.getopnum() != rop.LABEL:
            # stop, there is no chance to vectorize this trace
            # we cannot optimize normal traces (if there is no label)
            raise NotAVectorizeableLoop()

        # find index guards and move to the earliest position
        graph = self.analyse_index_calculations(loop)
        if graph is not None:
            state = SchedulerState(graph)
            self.schedule(state) # reorder the trace

        # unroll
        self.unroll_count = self.get_unroll_count(vsize)
        self.unroll_loop_iterations(loop, self.unroll_count)

        # vectorize
        graph = DependencyGraph(loop)
        self.find_adjacent_memory_refs(graph)
        self.extend_packset()
        self.combine_packset()
        # TODO move cost model to CPU
        costmodel = X86_CostModel(self.cpu, self.cost_threshold)
        state = VecScheduleState(graph, self.packset, self.cpu, costmodel)
        self.schedule(state)
        if not state.profitable():
            raise NotAProfitableLoop()
        return graph.index_vars
예제 #7
0
    def vectorize(self, loop, unroll_factor=-1):
        info = FakeLoopInfo(loop)
        info.snapshot(loop)
        opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
        opt.find_adjacent_memory_refs(graph)
        opt.extend_packset()
        opt.combine_packset()
        costmodel = GenericCostModel(self.cpu, 0)
        state = VecScheduleState(graph, opt.packset, self.cpu, costmodel)
        opt.schedule(state)
        if not costmodel.profitable():
            raise NotAProfitableLoop()
        gso = GuardStrengthenOpt(graph.index_vars)
        gso.propagate_all_forward(info, loop)
        #
        # re-schedule
        graph = DependencyGraph(loop)
        state = SchedulerState(self.cpu, graph)
        state.prepare()
        Scheduler().walk_and_emit(state)
        state.post_schedule()
        #
        oplist = loop.operations

        loop.operations = loop.prefix[:]
        if loop.prefix_label:
            loop.operations += [loop.prefix_label]
        loop.operations += oplist
        return opt
예제 #8
0
파일: test_guard.py 프로젝트: zcxowwww/pypy
 def optguards(self, loop, user_code=False):
     info = FakeLoopInfo(loop)
     info.snapshot(loop)
     for op in loop.operations:
         if op.is_guard():
             op.setdescr(compile.CompileLoopVersionDescr())
     dep = DependencyGraph(loop)
     opt = GuardStrengthenOpt(dep.index_vars)
     opt.propagate_all_forward(info, loop, user_code)
     return opt
예제 #9
0
 def test_delayed_schedule(self):
     loop = self.parse("""
     [i0]
     i1 = int_add(i0,1)
     i2 = int_add(i0,1)
     jump(i2)
     """)
     loop.prefix_label = None
     loop.label = ResOperation(rop.LABEL, loop.inputargs)
     ops = loop.operations
     loop.operations = ops[:-1]
     loop.jump = ops[-1]
     state = SchedulerState(self.cpu, DependencyGraph(loop))
     state.schedule()
     assert len(loop.operations) == 1
예제 #10
0
 def schedule(self, loop, unroll_factor=-1, with_guard_opt=False):
     info = FakeLoopInfo(loop)
     info.snapshot(loop)
     opt, graph = self.vectoroptimizer_unrolled(loop, unroll_factor)
     opt.find_adjacent_memory_refs(graph)
     opt.extend_packset()
     opt.combine_packset()
     costmodel = FakeCostModel(self.cpu)
     state = VecScheduleState(graph, opt.packset, self.cpu, costmodel)
     opt.schedule(state)
     if with_guard_opt:
         gso = GuardStrengthenOpt(graph.index_vars)
         gso.propagate_all_forward(info, loop)
     # re-schedule
     graph = DependencyGraph(loop)
     state = SchedulerState(self.cpu, graph)
     state.prepare()
     Scheduler().walk_and_emit(state)
     state.post_schedule()
     return opt
예제 #11
0
 def savings(self, loop):
     jitdriver_sd = FakeJitDriverStaticData()
     opt = VectorizingOptimizer(self.metainterp_sd, jitdriver_sd, 0)
     opt.orig_label_args = loop.label.getarglist()[:]
     graph = opt.dependency_graph = DependencyGraph(loop)
     self.show_dot_graph(graph, 'costmodel')
     for k, m in graph.memory_refs.items():
         graph.memory_refs[k] = FakeMemoryRef(m.array, m.index_var)
     opt.find_adjacent_memory_refs(graph)
     opt.extend_packset()
     opt.combine_packset()
     for pack in opt.packset.packs:
         print "pack: \n   ",
         print '\n    '.join(
             [str(op.getoperation()) for op in pack.operations])
         print
     costmodel = FakeCostModel(GenericCostModel(self.cpu, 0))
     costmodel.reset_savings()
     state = VecScheduleState(graph, opt.packset, self.cpu, costmodel)
     opt.schedule(state)
     return costmodel.getsavings()
예제 #12
0
파일: vector.py 프로젝트: sota/pypy-old
    def analyse_index_calculations(self, loop):
        """ Tries to move guarding instructions an all the instructions that
            need to be computed for the guard to the loop header. This ensures
            that guards fail 'early' and relax dependencies. Without this
            step vectorization would not be possible!
        """
        graph = DependencyGraph(loop)
        zero_deps = {}
        for node in graph.nodes:
            if node.depends_count() == 0:
                zero_deps[node] = 0
        earlyexit = graph.imaginary_node("early exit")
        guards = graph.guards
        one_valid = False
        valid_guards = []
        for guard_node in guards:
            modify_later = []
            last_prev_node = None
            valid = True
            if guard_node in zero_deps:
                del zero_deps[guard_node]
            for prev_dep in guard_node.depends():
                prev_node = prev_dep.to
                if prev_dep.is_failarg():
                    # remove this edge later.
                    # 1) only because of failing, this dependency exists
                    # 2) non pure operation points to this guard.
                    #    but if this guard only depends on pure operations, it can be checked
                    #    at an earlier position, the non pure op can execute later!
                    modify_later.append(prev_node)
                else:
                    for path in prev_node.iterate_paths(None, backwards=True, blacklist=True):
                        if not path.is_always_pure():
                            valid = False
                        else:
                            if path.last() in zero_deps:
                                del zero_deps[path.last()]
                    if not valid:
                        break
            if valid:
                # transformation is valid, modify the graph and execute
                # this guard earlier
                one_valid = True
                for node in modify_later:
                    node.remove_edge_to(guard_node)
                # every edge that starts in the guard, the early exit
                # inherts the edge and guard then provides to early exit
                for dep in guard_node.provides()[:]:
                    assert not dep.target_node().is_imaginary()
                    earlyexit.edge_to(dep.target_node(), failarg=True)
                    guard_node.remove_edge_to(dep.target_node())
                valid_guards.append(guard_node)

                guard_node.edge_to(earlyexit)
                self.mark_guard(guard_node, loop)
        for node in zero_deps.keys():
            assert not node.is_imaginary()
            earlyexit.edge_to(node)
        if one_valid:
            return graph
        return None