def TestCompileExec(): import time SIZE = 1024 kernel = ( "il_ps_2_0\n" + "dcl_input_position_interp(linear_noperspective) v0\n" + "dcl_output_generic o0\n" + "dcl_output_generic o1\n" + #"dcl_output_generic o2\n" + "dcl_resource_id(0)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)\n" + #"mov r0, g[0]\n" + "sample_resource(0)_sampler(0) o0, v0.xyxx\n" + "mov g[0], r0\n" + "end\n") t1 = time.time() image = cal_exec.compile(kernel) t2 = time.time() print "compile time", t2 - t1 input = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, SIZE, SIZE, 0) output = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, SIZE, SIZE, 0) #glob = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, 4096, 4096, cal_exec.GLOBAL_BUFFER) print "input", input print "output", output remote = {"o0": output, "i0": input} local = { "o1": (SIZE, SIZE, cal_exec.FMT_FLOAT32_4), "g[]": (4096, 4096, cal_exec.FMT_FLOAT32_4) } domain = (0, 0, SIZE, SIZE) print "remote bindings", remote print "local bindings", local # image, dev num, (x, y, w, h) t1 = time.time() cal_exec.run_stream(image, 0, domain, local, remote) t2 = time.time() print "run time", t2 - t1 cal_exec.free_remote(input) cal_exec.free_remote(output) #cal_exec.free_remote(glob) cal_exec.free_image(image) return
def TestCompileExec(): import time SIZE = 1024 kernel = ("il_ps_2_0\n" + "dcl_input_position_interp(linear_noperspective) v0\n" + "dcl_output_generic o0\n" + "dcl_output_generic o1\n" + #"dcl_output_generic o2\n" + "dcl_resource_id(0)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)\n" + #"mov r0, g[0]\n" + "sample_resource(0)_sampler(0) o0, v0.xyxx\n" + "mov g[0], r0\n" + "end\n") t1 = time.time() image = cal_exec.compile(kernel) t2 = time.time() print "compile time", t2 - t1 input = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, SIZE, SIZE, 0) output = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, SIZE, SIZE, 0) #glob = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, 4096, 4096, cal_exec.GLOBAL_BUFFER) print "input", input print "output", output remote = {"o0": output, "i0": input} local = {"o1": (SIZE, SIZE, cal_exec.FMT_FLOAT32_4), "g[]": (4096, 4096, cal_exec.FMT_FLOAT32_4)} domain = (0, 0, SIZE, SIZE) print "remote bindings", remote print "local bindings", local # image, dev num, (x, y, w, h) t1 = time.time() cal_exec.run_stream(image, 0, domain, local, remote) t2 = time.time() print "run time", t2 - t1 cal_exec.free(input) cal_exec.free(output) #cal_exec.free(glob) cal_exec.free_image(image) return
def alloc_remote(self, typecode, comps, width, height=1, globl=False): if typecode == 'f': if comps == 1: fmt = cal_exec.FMT_FLOAT32_1 elif comps == 2: fmt = cal_exec.FMT_FLOAT32_2 elif comps == 4: fmt = cal_exec.FMT_FLOAT32_4 else: raise Exception("Number of components must be 1, 2, or 4") elif typecode == 'i': if comps == 1: fmt = cal_exec.FMT_SIGNED_INT32_1 elif comps == 2: fmt = cal_exec.FMT_SIGNED_INT32_2 elif comps == 4: fmt = cal_exec.FMT_SIGNED_INT32_4 else: raise Exception("Number of components must be 1, 2, or 4") elif typecode == 'I': if comps == 1: fmt = cal_exec.FMT_UNSIGNED_INT32_1 elif comps == 2: fmt = cal_exec.FMT_UNSIGNED_INT32_2 elif comps == 4: fmt = cal_exec.FMT_UNSIGNED_INT32_4 else: raise Exception("Number of components must be 1, 2, or 4") else: raise Exception("Unsupported data type: " + str(typecode)) if globl: globl = cal_exec.GLOBAL_BUFFER # Allocate and initialize the memory # TODO - more operand error checking mem = cal_exec.alloc_remote(self.device, fmt, width, height, globl) arr = extarray.extarray(typecode, 0) arr.data_len = mem[1] * height * comps arr.set_memory(mem[0], arr.data_len * 4) arr.gpu_mem_handle = mem arr.gpu_device = self.device arr.gpu_width = width arr.gpu_pitch = mem[1] return arr
def alloc_remote(self, typecode, comps, width, height = 1, globl = False): if typecode == 'f': if comps == 1: fmt = cal_exec.FMT_FLOAT32_1 elif comps == 2: fmt = cal_exec.FMT_FLOAT32_2 elif comps == 4: fmt = cal_exec.FMT_FLOAT32_4 else: raise Exception("Number of components must be 1, 2, or 4") elif typecode == 'i': if comps == 1: fmt = cal_exec.FMT_SIGNED_INT32_1 elif comps == 2: fmt = cal_exec.FMT_SIGNED_INT32_2 elif comps == 4: fmt = cal_exec.FMT_SIGNED_INT32_4 else: raise Exception("Number of components must be 1, 2, or 4") elif typecode == 'I': if comps == 1: fmt = cal_exec.FMT_UNSIGNED_INT32_1 elif comps == 2: fmt = cal_exec.FMT_UNSIGNED_INT32_2 elif comps == 4: fmt = cal_exec.FMT_UNSIGNED_INT32_4 else: raise Exception("Number of components must be 1, 2, or 4") else: raise Exception("Unsupported data type: " + str(typecode)) if globl: globl = cal_exec.GLOBAL_BUFFER # Allocate and initialize the memory # TODO - more operand error checking mem = cal_exec.alloc_remote(self.device, fmt, width, height, globl) arr = extarray.extarray(typecode, 0) arr.data_len = mem[1] * height * comps arr.set_memory(mem[0], arr.data_len * 4) arr.gpu_mem_handle = mem arr.gpu_device = self.device arr.gpu_width = width arr.gpu_pitch = mem[1] return arr
def alloc_remote(self, typecode, comps, width, height = 1, globl = False): """Allocate an ExtArray backed by remote (main) memory.""" fmt = self._get_fmt(typecode, comps) if globl: globl = cal_exec.GLOBAL_BUFFER # Allocate and initialize the memory # TODO - more operand error checking mem = cal_exec.alloc_remote(self.device, fmt, width, height, globl) arr = extarray.extarray(typecode, 0) arr.data_len = mem[2] * height * comps arr.set_memory(mem[1], arr.data_len * 4) arr.gpu_mem_handle = mem arr.gpu_device = self.device arr.gpu_width = width arr.gpu_pitch = mem[2] arr.gpu_height = height return arr
def TestRemoteAlloc(): mem_handle = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, 1024, 1024, 0) print "mem handle", mem_handle cal_exec.free_remote(mem_handle) return
def TestRemoteAlloc(): mem_handle = cal_exec.alloc_remote(cal_exec.FMT_FLOAT32_4, 1024, 1024, 0) print "mem handle", mem_handle cal_exec.free(mem_handle) return