def apply(cls, gtir, sdfg: dace.SDFG): self = cls() code_objects = sdfg.generate_code() computations = code_objects[[co.title for co in code_objects ].index("Frame")].clean_code lines = computations.split("\n") computations = "\n".join( lines[0:2] + lines[3:]) # remove import of not generated file computations = codegen.format_source("cpp", computations, style="LLVM") interface = cls.template.definition.render( name=sdfg.name, dace_args=self.generate_dace_args(gtir, sdfg), functor_args=self.generate_functor_args(sdfg), tmp_allocs=self.generate_tmp_allocs(sdfg), ) generated_code = f"""#include <gridtools/sid/sid_shift_origin.hpp> #include <gridtools/sid/allocator.hpp> #include <gridtools/stencil/cartesian.hpp> namespace gt = gridtools; {computations} {interface} """ formatted_code = codegen.format_source("cpp", generated_code, style="LLVM") return formatted_code
def apply(cls, stencil_ir: gtir.Stencil, sdfg: dace.SDFG): self = cls() with dace.config.temporary_config(): dace.config.Config.set("compiler", "cuda", "max_concurrent_streams", value=-1) dace.config.Config.set("compiler", "cpu", "openmp_sections", value=False) code_objects = sdfg.generate_code() is_gpu = "CUDA" in {co.title for co in code_objects} computations = cls._postprocess_dace_code(code_objects, is_gpu) interface = cls.template.definition.render( name=sdfg.name, dace_args=self.generate_dace_args(stencil_ir, sdfg), functor_args=self.generate_functor_args(sdfg), tmp_allocs=self.generate_tmp_allocs(sdfg), allocator="gt::cuda_util::cuda_malloc" if is_gpu else "std::make_unique", ) generated_code = f"""#include <gridtools/sid/sid_shift_origin.hpp> #include <gridtools/sid/allocator.hpp> #include <gridtools/stencil/cartesian.hpp> {"#include <gridtools/common/cuda_util.hpp>" if is_gpu else ""} namespace gt = gridtools; {computations} {interface} """ formatted_code = codegen.format_source("cpp", generated_code, style="LLVM") return formatted_code