def join(self, hdl): # TODO - do something better to differentiate if len(hdl) == 2: # Join a kernel execution (th, prgm) = hdl cal_exec.join_stream(th) for arr in prgm._remote_bindings_data.values(): binding = prgm._bindings[key] if isinstance(arr, extarray.extarray): arr.set_memory(bindings[1], arr.data_len * arr.itemsize) elif isinstance(arr, numpy.ndarray) and HAS_NUMPY: cal_exec.set_ndarray_ptr(arr, bindings[1]) elif len(hdl) == 3: cal_exec.join_copy(self.ctx, hdl) return
class Processor(spe.Processor): exec_module = cal_exec def __init__(self, device): spe.Processor.__init__(self) if device < 0 or device > N_GPUS: raise Exception("Invalid device number %d" % device) self.device = device return def execute(self, code, domain=None, async=False): code.cache_code() if domain is None: try: input = code.get_remote_binding("i0") except KeyError: raise Exception( "No domain specified and no remote i0 register bound") domain = (0, 0, input.gpu_width, len(input) / input.gpu_width) if async: th = cal_exec.run_stream_async(code.render_code, self.device, domain, code._local_bindings, code._remote_bindings, code._copy_bindings) return (th, code) else: cal_exec.run_stream(code.render_code, self.device, domain, code._local_bindings, code._remote_bindings, code._copy_bindings) try: import numpy for (key, arr) in code._remote_bindings_data.items(): if isinstance(arr, extarray.extarray): arr.set_memory(arr.gpu_mem_handle[0], arr.data_len * arr.itemsize) elif isinstance(arr, numpy.ndarray): cal_exec.set_ndarray_ptr(arr, code._remote_bindings[key][0]) for (key, arr) in code._copy_bindings_data.items(): if isinstance(arr, extarray.extarray): arr.set_memory(arr.gpu_mem_handle[0], arr.data_len * arr.itemsize) elif isinstance(arr, numpy.ndarray): cal_exec.set_ndarray_ptr(arr, code._remote_bindings[key][0]) except ImportError: for arr in code._remote_bindings_data.values(): arr.set_memory(arr.gpu_mem_handle[0], arr.data_len * arr.itemsize) for arr in code._copy_bindings_data.values(): arr.set_memory(arr.gpu_mem_handle[0], arr.data_len * arr.itemsize) return
if async: th = cal_exec.run_stream_async(prgm.render_code, self.ctx, domain, prgm._bindings) return (th, prgm) else: cal_exec.run_stream(prgm.render_code, self.ctx, domain, prgm._bindings) # Go through the bindings and re-set all the pointers # When a kernel is executed, remote memory has to be unmapped and # remapped, meaning the memory location can change. for (key, arr) in prgm._bindings_data.items(): binding = prgm._bindings[key] if isinstance(arr, extarray.extarray): arr.set_memory(binding[1], arr.data_len * arr.itemsize) elif isinstance(arr, numpy.ndarray) and HAS_NUMPY: cal_exec.set_ndarray_ptr(arr, binding[1]) return def join(self, hdl): # TODO - do something better to differentiate if len(hdl) == 2: # Join a kernel execution (th, prgm) = hdl cal_exec.join_stream(th) for arr in prgm._remote_bindings_data.values(): binding = prgm._bindings[key] if isinstance(arr, extarray.extarray): arr.set_memory(bindings[1], arr.data_len * arr.itemsize) elif isinstance(arr, numpy.ndarray) and HAS_NUMPY: