def __init__(self,
                 context,
                 rho,
                 rho_u,
                 rho_v,
                 E,
                 nx,
                 ny,
                 dx,
                 dy,
                 g,
                 gamma,
                 theta=1.3,
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(),
                 block_width=16,
                 block_height=8):

        # Call super constructor
        super().__init__(context, nx, ny, dx, dy, boundary_conditions,
                         cfl_scale, 2, block_width, block_height)
        self.g = np.float32(g)
        self.gamma = np.float32(gamma)
        self.theta = np.float32(theta)

        #Get kernels
        module = context.get_module("cuda/EE2D_KP07_dimsplit.cu",
                                    defines={
                                        'BLOCK_WIDTH': self.block_size[0],
                                        'BLOCK_HEIGHT': self.block_size[1]
                                    },
                                    compile_args={
                                        'no_extern_c': True,
                                        'options': ["--use_fast_math"],
                                    },
                                    jit_compile_args={})
        self.kernel = module.get_function("KP07DimsplitKernel")
        self.kernel.prepare("iiffffffiiPiPiPiPiPiPiPiPiP")

        #Create data by uploading to device
        self.u0 = Common.ArakawaA2D(self.stream, nx, ny, 2, 2,
                                    [rho, rho_u, rho_v, E])
        self.u1 = Common.ArakawaA2D(self.stream, nx, ny, 2, 2,
                                    [None, None, None, None])
        self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
        dt_x = np.min(self.dx / (np.abs(rho_u / rho) + np.sqrt(gamma * rho)))
        dt_y = np.min(self.dy / (np.abs(rho_v / rho) + np.sqrt(gamma * rho)))
        self.dt = min(dt_x, dt_y)
        self.cfl_data.fill(self.dt, stream=self.stream)
Beispiel #2
0
 def setup_mpi(self, line):
     args = magic_arguments.parse_argstring(self.setup_mpi, line)
     logger = logging.getLogger('GPUSimulators')
     if args.name in self.shell.user_ns.keys():
         logger.warning("MPI alreay set up, resetting")
         self.shell.user_ns[args.name].shutdown()
         self.shell.user_ns[args.name] = None
         gc.collect()
     self.shell.user_ns[args.name] = Common.IPEngine(args.num_engines)
Beispiel #3
0
    def __init__(self,
                 context,
                 h0,
                 hu0,
                 hv0,
                 nx,
                 ny,
                 dx,
                 dy,
                 g,
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(),
                 block_width=16,
                 block_height=16):

        # Call super constructor
        super().__init__(context, nx, ny, dx, dy, boundary_conditions,
                         cfl_scale, 2, block_width, block_height)
        self.g = np.float32(g)

        #Get kernels
        module = context.get_module("cuda/SWE2D_WAF.cu",
                                    defines={
                                        'BLOCK_WIDTH': self.block_size[0],
                                        'BLOCK_HEIGHT': self.block_size[1]
                                    },
                                    compile_args={
                                        'no_extern_c': True,
                                        'options': ["--use_fast_math"],
                                    },
                                    jit_compile_args={})
        self.kernel = module.get_function("WAFKernel")
        self.kernel.prepare("iiffffiiPiPiPiPiPiPiP")

        #Create data by uploading to device
        self.u0 = Common.ArakawaA2D(self.stream, nx, ny, 2, 2, [h0, hu0, hv0])
        self.u1 = Common.ArakawaA2D(self.stream, nx, ny, 2, 2,
                                    [None, None, None])
        self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
        dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0)))
        dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0)))
        dt = min(dt_x, dt_y)
        self.cfl_data.fill(dt, stream=self.stream)
Beispiel #4
0
    def benchmark_single_simulator(simulator, arguments, block_widths, block_heights):
        logger = logging.getLogger(__name__)
        
        megacells = np.empty((len(block_heights), len(block_widths)))
        megacells.fill(np.nan)

        logger.debug("Running %d benchmarks with %s", len(block_heights)*len(block_widths), simulator.__name__)
        
        sim_arguments = arguments.copy()
                    
        with Common.Timer(simulator.__name__) as t:
            for j, block_height in enumerate(block_heights):
                sim_arguments.update({'block_height': block_height})
                for i, block_width in enumerate(block_widths):
                    sim_arguments.update({'block_width': block_width})
                    megacells[j, i] = Autotuner.run_benchmark(simulator, sim_arguments)
                        

        logger.debug("Completed %s in %f seconds", simulator.__name__, t.secs)

        return megacells
    def get_module(self, kernel_filename,
                    include_dirs=[], \
                    defines={}, \
                    compile_args={'no_extern_c', True}, jit_compile_args={}):
        """
        Helper function to print compilation output
        """
        def cuda_compile_message_handler(compile_success_bool, info_str,
                                         error_str):
            self.logger.debug("Compilation returned %s",
                              str(compile_success_bool))
            if info_str:
                self.logger.debug("Info: %s", info_str)
            if error_str:
                self.logger.debug("Error: %s", error_str)

        kernel_filename = os.path.normpath(kernel_filename)
        kernel_path = os.path.abspath(
            os.path.join(self.module_path, kernel_filename))
        #self.logger.debug("Getting %s", kernel_filename)

        # Create a hash of the kernel options
        options_hasher = hashlib.md5()
        options_hasher.update(
            str(defines).encode('utf-8') + str(compile_args).encode('utf-8'))
        options_hash = options_hasher.hexdigest()

        # Create hash of kernel souce
        source_hash = CudaContext.hash_kernel( \
                    kernel_path, \
                    include_dirs=[self.module_path] + include_dirs)

        # Create final hash
        root, ext = os.path.splitext(kernel_filename)
        kernel_hash = root \
                + "_" + source_hash \
                + "_" + options_hash \
                + ext
        cached_kernel_filename = os.path.join(self.cache_path, kernel_hash)

        # If we have the kernel in our hashmap, return it
        if (kernel_hash in self.modules.keys()):
            self.logger.debug("Found kernel %s cached in hashmap (%s)",
                              kernel_filename, kernel_hash)
            return self.modules[kernel_hash]

        # If we have it on disk, return it
        elif (self.use_cache and os.path.isfile(cached_kernel_filename)):
            self.logger.debug("Found kernel %s cached on disk (%s)",
                              kernel_filename, kernel_hash)

            with io.open(cached_kernel_filename, "rb") as file:
                file_str = file.read()
                module = cuda.module_from_buffer(
                    file_str,
                    message_handler=cuda_compile_message_handler,
                    **jit_compile_args)

            self.modules[kernel_hash] = module
            return module

        # Otherwise, compile it from source
        else:
            self.logger.debug("Compiling %s (%s)", kernel_filename,
                              kernel_hash)

            #Create kernel string
            kernel_string = ""
            for key, value in defines.items():
                kernel_string += "#define {:s} {:s}\n".format(
                    str(key), str(value))
            kernel_string += '#include "{:s}"'.format(
                os.path.join(self.module_path, kernel_filename))
            if (self.use_cache):
                cached_kernel_dir = os.path.dirname(cached_kernel_filename)
                if not os.path.isdir(cached_kernel_dir):
                    os.mkdir(cached_kernel_dir)
                with io.open(cached_kernel_filename + ".txt", "w") as file:
                    file.write(kernel_string)

            with Common.Timer("compiler") as timer:
                import warnings
                with warnings.catch_warnings():
                    warnings.filterwarnings(
                        "ignore",
                        message=
                        "The CUDA compiler succeeded, but said the following:\nkernel.cu",
                        category=UserWarning)
                    cubin = cuda_compiler.compile(kernel_string,
                                                  include_dirs=include_dirs,
                                                  cache_dir=False,
                                                  **compile_args)
                module = cuda.module_from_buffer(
                    cubin,
                    message_handler=cuda_compile_message_handler,
                    **jit_compile_args)
                if (self.use_cache):
                    with io.open(cached_kernel_filename, "wb") as file:
                        file.write(cubin)

            self.modules[kernel_hash] = module
            return module
Beispiel #6
0
arguments['grid'] = grid

####
# Run simulation
####
logger.info("Running simulation")


#Helper function to create MPI simulator
def genSim(grid, **kwargs):
    local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs)
    sim = MPISimulator.MPISimulator(local_sim, grid)
    return sim


outfile = Common.runSimulation(genSim, arguments, outfile, save_times,
                               save_var_names)

####
# Clean shutdown
####
sim = None
local_sim = None
cuda_context = None
arguments = None
logging.shutdown()
gc.collect()

####
# Print completion and exit
####
print("Completed!")