def __call__(self, definition_ir) -> Dict[str, Dict[str, str]]: gtir = GtirPipeline(DefIRToGTIR.apply(definition_ir)).full() base_oir = gtir_to_oir.GTIRToOIR().visit(gtir) oir_pipeline = self.backend.builder.options.backend_opts.get( "oir_pipeline", DefaultPipeline(skip=[NoFieldAccessPruning])) oir = oir_pipeline.run(base_oir) oir = FillFlushToLocalKCaches().visit(oir) cuir = oir_to_cuir.OIRToCUIR().visit(oir) cuir = kernel_fusion.FuseKernels().visit(cuir) cuir = extent_analysis.CacheExtents().visit(cuir) format_source = self.backend.builder.options.format_source implementation = cuir_codegen.CUIRCodegen.apply( cuir, format_source=format_source) bindings = GTCCudaBindingsCodegen.apply(cuir, module_name=self.module_name, backend=self.backend, format_source=format_source) return { "computation": { "computation.hpp": implementation }, "bindings": { "bindings.cu": bindings }, }
def test_forward_backward_fusion(): testee = ProgramFactory(kernels=[ KernelFactory(vertical_loops__0=VerticalLoopFactory( loop_order=LoopOrder.FORWARD, sections=[ VerticalLoopSectionFactory( end__level=0, end__offset=1, horizontal_executions__0__body__0=AssignStmtFactory( left__name="tmp", right__name="inp"), ), VerticalLoopSectionFactory( start__offset=1, horizontal_executions__0__body__0=AssignStmtFactory( left__name="tmp", right__name="tmp", right__offset__k=-1), ), ], )), KernelFactory(vertical_loops__0=VerticalLoopFactory( loop_order=LoopOrder.BACKWARD, sections__0__horizontal_executions__0__body__0=AssignStmtFactory( left__name="out", right__name="tmp"), )), ], ) transformed = kernel_fusion.FuseKernels().visit(testee) assert len(transformed.kernels) == 1 assert transformed.kernels[0].vertical_loops == ( testee.kernels[0].vertical_loops + testee.kernels[1].vertical_loops)
def test_no_fusion_with_parallel_offsets(): testee = ProgramFactory(kernels=[ KernelFactory(vertical_loops__0=VerticalLoopFactory( loop_order=LoopOrder.FORWARD, sections=[ VerticalLoopSectionFactory( end__level=0, end__offset=1, horizontal_executions__0__body__0=AssignStmtFactory( left__name="tmp", right__name="inp"), ), VerticalLoopSectionFactory( start__offset=1, horizontal_executions__0__body__0=AssignStmtFactory( left__name="tmp", right__name="tmp", right__offset__k=-1), ), ], )), KernelFactory(vertical_loops__0=VerticalLoopFactory( loop_order=LoopOrder.BACKWARD, sections__0__horizontal_executions__0__body__0=AssignStmtFactory( left__name="out", right__name="tmp", right__offset__i=1), )), ], ) transformed = kernel_fusion.FuseKernels().visit(testee) assert len(transformed.kernels) == 2 testee = ProgramFactory(kernels=[ KernelFactory( vertical_loops__0__sections__0__horizontal_executions__0__body__0= AssignStmtFactory(left__name="tmp", right__name="inp")), KernelFactory( vertical_loops__0__sections__0__horizontal_executions__0__body__0= AssignStmtFactory( left__name="out", right__name="tmp", right__offset__k=1)), ], ) transformed = kernel_fusion.FuseKernels().visit(testee) assert len(transformed.kernels) == 2
def __call__(self, definition_ir) -> Dict[str, Dict[str, str]]: gtir = DefIRToGTIR.apply(definition_ir) gtir_without_unused_params = prune_unused_parameters(gtir) dtype_deduced = resolve_dtype(gtir_without_unused_params) upcasted = upcast(dtype_deduced) oir = gtir_to_oir.GTIRToOIR().visit(upcasted) oir = self._optimize_oir(oir) cuir = oir_to_cuir.OIRToCUIR().visit(oir) cuir = kernel_fusion.FuseKernels().visit(cuir) cuir = extent_analysis.ComputeExtents().visit(cuir) cuir = extent_analysis.CacheExtents().visit(cuir) implementation = cuir_codegen.CUIRCodegen.apply(cuir) bindings = GTCCudaBindingsCodegen.apply(cuir, module_name=self.module_name) return { "computation": {"computation.hpp": implementation}, "bindings": {"bindings.cu": bindings}, }
def __call__(self, definition_ir) -> Dict[str, Dict[str, str]]: gtir = GtirPipeline(DefIRToGTIR.apply(definition_ir)).full() oir = OirPipeline(gtir_to_oir.GTIRToOIR().visit(gtir)).full( skip=[NoFieldAccessPruning]) cuir = oir_to_cuir.OIRToCUIR().visit(oir) cuir = kernel_fusion.FuseKernels().visit(cuir) cuir = extent_analysis.ComputeExtents().visit(cuir) cuir = extent_analysis.CacheExtents().visit(cuir) implementation = cuir_codegen.CUIRCodegen.apply(cuir) bindings = GTCCudaBindingsCodegen.apply(cuir, module_name=self.module_name, backend=self.backend) return { "computation": { "computation.hpp": implementation }, "bindings": { "bindings.cu": bindings }, }