def execute_datadescriptor_ooc(dd, res_name=None): # only lift by one res_ds = dd.dshape res_shape, res_dt = to_numpy(dd.dshape) lifted = dd.kerneltree._fused.kernel.lift(1,'C') cf = lifted.ctypes_func # element readers for operands args = [(ct._type_, arr.arr._data.element_reader(1), arr.arr.dshape.shape[1:]) for ct, arr in izip(cf.argtypes[:-1], dd.args)] res_dd = BLZDataDescriptor(blz.zeros((0,) + res_shape[1:], dtype = res_dt, rootdir = res_name)) res_ct = ctypes.c_double*3 res_buffer = res_ct() res_buffer_entry = (cf.argtypes[-1]._type_, ctypes.pointer(res_buffer), res_shape[1:]) with res_dd.element_appender() as ea: for i in xrange(res_shape[0]): args_i = [(t, er.read_single((i,)), sh) for t, er, sh in args] args_i.append(res_buffer_entry) cf_args = [_convert(*foo) for foo in args_i] cf(*[ctypes.byref(x) for x in cf_args]) ea.append(ctypes.addressof(res_buffer),1) return blaze.Array(res_dd)
def execute_datadescriptor_ooc_2(dd, res_name=None): res_ds = dd.dshape res_shape, res_dt = to_numpy(dd.dshape) lifted = dd.kerneltree._fused.kernel.lift(1,'C') cf = lifted.ctypes_func res_ctype = cf.argtypes[-1]._type_ args = [(ct._type_, arr.arr._data.element_reader(1), arr.arr.dshape.shape[1:]) for ct, arr in izip(cf.argtypes[:-1], dd.args)] res_dd = BLZDataDescriptor(blz.zeros((0,) + res_shape[1:], dtype = res_dt, rootdir = res_name)) with res_dd.element_appender() as dst: for i in xrange(res_shape[0]): # advance sources tpl = (i,) cf_args = [_mk_array_c_ref(t, er.read_single(tpl), sh) for t, er, sh in args ] with dst.buffered_ptr() as dst_ptr: cf_args.append(_mk_array_c_ref(res_ctype, dst_ptr, res_shape[1:])) cf(*cf_args) return blaze.Array(res_dd)
def interpret(func, env, args, storage=None, **kwds): assert len(args) == len(func.args) # Make a copy, since we're going to mutate our IR! func = copy_function(func) # If it's a BLZ output, we want an interpreter that streams # the processing through in chunks if storage is not None: if len(func.type.restype.shape) == 0: raise TypeError('Require an array, not a scalar, for outputting to BLZ') env['stream-outer'] = True result_ndim = env['result-ndim'] = len(func.type.restype.shape) else: # Convert any persistent inputs to memory # TODO: should stream the computation in this case for i, arg in enumerate(args): if isinstance(arg._data, BLZDataDescriptor): args[i] = arg[:] # Update environment with dynd type information dynd_types = dict((arg, get_dynd_type(array)) for arg, array in zip(func.args, args) if isinstance(array._data, DyNDDataDescriptor)) env['dynd-types'] = dynd_types # Lift ckernels func, env = run_pipeline(func, env, run_time_passes) if storage is None: # Evaluate once values = dict(zip(func.args, args)) interp = CKernelInterp(values) visit(interp, func) return interp.result else: res_shape, res_dt = blaze.datashape.to_numpy(func.type.restype) dim_size = operator.index(res_shape[0]) row_size = ndt.type(str(func.type.restype.subarray(1))).data_size chunk_size = min(max(1, (1024*1024) // row_size), dim_size) # Evaluate by streaming the outermost dimension, # and using the BLZ data descriptor's append dst_dd = BLZDataDescriptor(blz.zeros((0,)+res_shape[1:], res_dt, rootdir=storage.path)) # Loop through all the chunks for chunk_start in range(0, dim_size, chunk_size): # Tell the interpreter which chunk size to use (last # chunk might be smaller) chunk_size = min(chunk_size, dim_size - chunk_start) # Evaluate the chunk args_chunk = [arg[chunk_start:chunk_start+chunk_size] if len(arg.dshape.shape) == result_ndim else arg for arg in args] values = dict(zip(func.args, args_chunk)) interp = CKernelChunkInterp(values, chunk_size, result_ndim) visit(interp, func) chunk = interp.result._data.dynd_arr() dst_dd.append(chunk) return blaze.Array(dst_dd)