def typeof_pyval(cls, val): """ This is called from numba._dispatcher as a fallback if the native code cannot decide the type. """ if isinstance(val, numpy.ndarray): # TODO complete dtype mapping dtype = numpy_support.from_dtype(val.dtype) ndim = val.ndim if ndim == 0: # is array scalar return numpy_support.from_dtype(val.dtype) layout = numpy_support.map_layout(val) aryty = types.Array(dtype, ndim, layout) return aryty # The following are handled in the C version for exact type match # So test these later elif isinstance(val, INT_TYPES): return types.int64 elif isinstance(val, float): return types.float64 elif isinstance(val, complex): return types.complex128 elif numpy_support.is_arrayscalar(val): # Array scalar return numpy_support.from_dtype(numpy.dtype(type(val))) # Other object else: return types.pyobject
def test_two_distinct_arrays(self): ''' Test with two arrays of distinct record types ''' pyfunc = get_two_arrays_distinct rec1 = numpy_support.from_dtype(recordtype) rec2 = numpy_support.from_dtype(recordtype2) cfunc = self.get_cfunc(pyfunc, (rec1[:], rec2[:], types.intp)) for i in range(self.refsample1d.size): pres = pyfunc(self.refsample1d, self.refsample1d2, i) cres = cfunc(self.nbsample1d, self.nbsample1d2, i) self.assertEqual(pres,cres)
def test_two_distinct_records(self): ''' Testing the use of two scalar records of differing type ''' nbval1 = self.nbsample1d.copy()[0] nbval2 = self.refsample1d2.copy()[0] expected = nbval1['a'] + nbval2['f'] nbrecord1 = numpy_support.from_dtype(recordtype) nbrecord2 = numpy_support.from_dtype(recordtype2) cfunc = self.get_cfunc(get_two_records_distinct, (nbrecord1, nbrecord2)) got = cfunc(nbval1, nbval2) self.assertEqual(expected, got)
def test_gufunc(self): datetime_t = from_dtype(np.dtype('datetime64[D]')) timedelta_t = from_dtype(np.dtype('timedelta64[D]')) @guvectorize([(datetime_t, datetime_t, timedelta_t[:])], '(),()->()', target='cuda') def timediff(start, end, out): out[0] = end - start arr1 = np.arange('2005-02', '2006-02', dtype='datetime64[D]') arr2 = arr1 + np.random.randint(0, 10000, arr1.size) delta = timediff(arr1, arr2) self.assertPreciseEqual(delta, arr2 - arr1)
def test_record_args(self): """ Testing scalar record value as argument """ recval = self.sample1d.copy()[0] attrs = 'abc' valtypes = types.float64, types.int32, types.complex64 values = 1.23, 123432, 132j old_refcnt = sys.getrefcount(recval) for attr, valtyp, val in zip(attrs, valtypes, values): expected = getattr(recval, attr) pyfunc = globals()['get_record_' + attr] nbrecord = numpy_support.from_dtype(recordtype) cres = compile_isolated(pyfunc, [nbrecord, valtyp]) cfunc = cres.entry_point got = cfunc(recval, val) self.assertEqual(expected, got) self.assertNotEqual(recval.a, got) del got, expected # Check for potential leaks (issue #441) self.assertEqual(sys.getrefcount(recval), old_refcnt)
def transpose(a, b=None): """Compute the transpose of 'a' and store it into 'b', if given, and return it. If 'b' is not given, allocate a new array and return that. This implements the algorithm documented in http://devblogs.nvidia.com/parallelforall/efficient-matrix-transpose-cuda-cc/ :param a: an `np.ndarray` or a `DeviceNDArrayBase` subclass. If already on the device its stream will be used to perform the transpose (and to copy `b` to the device if necessary). """ # prefer `a`'s stream if stream = getattr(a, 'stream', 0) if not b: cols, rows = a.shape strides = a.dtype.itemsize * cols, a.dtype.itemsize b = cuda.cudadrv.devicearray.DeviceNDArray( (rows, cols), strides, dtype=a.dtype, stream=stream) dt=nps.from_dtype(a.dtype) tpb = driver.get_device().MAX_THREADS_PER_BLOCK # we need to factor available threads into x and y axis tile_width = int(math.pow(2, math.log(tpb, 2)/2)) tile_height = int(tpb / tile_width) tile_shape=(tile_height, tile_width + 1) @cuda.jit def kernel(input, output): tile = cuda.shared.array(shape=tile_shape, dtype=dt) tx = cuda.threadIdx.x ty = cuda.threadIdx.y bx = cuda.blockIdx.x * cuda.blockDim.x by = cuda.blockIdx.y * cuda.blockDim.y x = by + tx y = bx + ty if by+ty < input.shape[0] and bx+tx < input.shape[1]: tile[ty, tx] = input[by+ty, bx+tx] cuda.syncthreads() if y < output.shape[0] and x < output.shape[1]: output[y, x] = tile[tx, ty] # one block per tile, plus one for remainders blocks = int(b.shape[0]/tile_height + 1), int(b.shape[1]/tile_width + 1) # one thread per tile element threads = tile_height, tile_width kernel[blocks, threads, stream](a, b) return b
def test_usecase1(self): pyfunc = usecase1 # This is an unaligned dtype mystruct_dt = np.dtype([('p', np.float64), ('row', np.float64), ('col', np.float64)]) mystruct = numpy_support.from_dtype(mystruct_dt) cres = compile_isolated(pyfunc, (mystruct[:], mystruct[:])) cfunc = cres.entry_point st1 = np.recarray(3, dtype=mystruct_dt) st2 = np.recarray(3, dtype=mystruct_dt) st1.p = np.arange(st1.size) + 1 st1.row = np.arange(st1.size) + 1 st1.col = np.arange(st1.size) + 1 st2.p = np.arange(st2.size) + 1 st2.row = np.arange(st2.size) + 1 st2.col = np.arange(st2.size) + 1 expect1 = st1.copy() expect2 = st2.copy() got1 = expect1.copy() got2 = expect2.copy() pyfunc(expect1, expect2) cfunc(got1, got2) self.assertTrue(np.all(expect1 == got1)) self.assertTrue(np.all(expect2 == got2))
def _infer_series_dtype(S): if S.dtype == np.dtype('O'): # XXX assuming the whole column is strings if 1st val is string # TODO: handle NA as 1st value i = 0 while i < len(S) and (S.iloc[i] is np.nan or S.iloc[i] is None): i += 1 if i == len(S): raise ValueError( "object dtype infer out of bounds for {}".format(S.name)) first_val = S.iloc[i] if isinstance(first_val, list): return _infer_series_list_dtype(S) elif isinstance(first_val, str): return string_type else: raise ValueError( "object dtype infer: data type for column {} not supported".format(S.name)) elif isinstance(S.dtype, pandas.api.types.CategoricalDtype): return PDCategoricalDtype(S.dtype.categories) # regular numpy types try: return numpy_support.from_dtype(S.dtype) except NotImplementedError: raise ValueError("np dtype infer: data type for column {} not supported".format(S.name))
def _numba_type_(self): """ Magic attribute expected by Numba to get the numba type that represents this object. """ dtype = numpy_support.from_dtype(self.dtype) return types.Array(dtype, self.ndim, 'A')
def map_struct_to_record_dtype(cffi_type): """Convert a cffi type into a NumPy Record dtype """ fields = { 'names': [], 'formats': [], 'offsets': [], 'itemsize': ffi.sizeof(cffi_type), } is_aligned = True for k, v in cffi_type.fields: # guard unsupport values if v.bitshift != -1: msg = "field {!r} has bitshift, this is not supported" raise ValueError(msg.format(k)) if v.flags != 0: msg = "field {!r} has flags, this is not supported" raise ValueError(msg.format(k)) if v.bitsize != -1: msg = "field {!r} has bitsize, this is not supported" raise ValueError(msg.format(k)) dtype = numpy_support.as_dtype(map_type(v.type, use_record_dtype=True), ) fields['names'].append(k) fields['formats'].append(dtype) fields['offsets'].append(v.offset) # Check alignment is_aligned &= (v.offset % dtype.alignment == 0) return numpy_support.from_dtype(np.dtype(fields, align=is_aligned))
def __call__(self, *args, **kwargs): if (self.neighborhood is not None and len(self.neighborhood) != args[0].ndim): raise ValueError("{} dimensional neighborhood specified for {} " "dimensional input array".format( len(self.neighborhood), args[0].ndim)) if 'out' in kwargs: result = kwargs['out'] rdtype = result.dtype rttype = numpy_support.from_dtype(rdtype) result_type = types.npytypes.Array( rttype, result.ndim, numpy_support.map_layout(result)) array_types = tuple([typing.typeof.typeof(x) for x in args]) array_types_full = tuple([typing.typeof.typeof(x) for x in args] + [result_type]) else: result = None array_types = tuple([typing.typeof.typeof(x) for x in args]) array_types_full = array_types if config.DEBUG_ARRAY_OPT >= 1: print("__call__", array_types, args, kwargs) (real_ret, typemap, calltypes) = self.get_return_type(array_types) new_func = self._stencil_wrapper(result, None, real_ret, typemap, calltypes, *array_types_full) if result is None: return new_func.entry_point(*args) else: return new_func.entry_point(*(args + (result, )))
def __call__(self, *args, **kwargs): if (self.neighborhood is not None and len(self.neighborhood) != args[0].ndim): raise ValueError("{} dimensional neighborhood specified for {} " "dimensional input array".format( len(self.neighborhood), args[0].ndim)) if 'out' in kwargs: result = kwargs['out'] rdtype = result.dtype rttype = numpy_support.from_dtype(rdtype) result_type = types.npytypes.Array(rttype, result.ndim, numpy_support.map_layout(result)) array_types = tuple([typing.typeof.typeof(x) for x in args]) array_types_full = tuple([typing.typeof.typeof(x) for x in args] + [result_type]) else: result = None array_types = tuple([typing.typeof.typeof(x) for x in args]) array_types_full = array_types if config.DEBUG_ARRAY_OPT == 1: print("__call__", array_types, args, kwargs) (real_ret, typemap, calltypes) = self.get_return_type(array_types) new_func = self._stencil_wrapper(result, None, real_ret, typemap, calltypes, *array_types_full) if result is None: return new_func.entry_point(*args) else: return new_func.entry_point(*(args+(result,)))
def test_structure_dtype_with_titles(self): # the following is the definition of int4 vector type from pyopencl vecint4 = np.dtype([(('x', 's0'), 'i4'), (('y', 's1'), 'i4'), (('z', 's2'), 'i4'), (('w', 's3'), 'i4')]) nbtype = numpy_support.from_dtype(vecint4) self.assertEqual(len(nbtype.fields), len(vecint4.fields)) arr = np.zeros(10, dtype=vecint4) def pyfunc(a): for i in range(a.size): j = i + 1 a[i]['s0'] = j * 2 a[i]['x'] += -1 a[i]['s1'] = j * 3 a[i]['y'] += -2 a[i]['s2'] = j * 4 a[i]['z'] += -3 a[i]['s3'] = j * 5 a[i]['w'] += -4 return a expect = pyfunc(arr.copy()) cfunc = self.get_cfunc(pyfunc, (nbtype[:],)) got = cfunc(arr.copy()) np.testing.assert_equal(expect, got)
def test_usecase1(self): pyfunc = usecase1 # This is an unaligned dtype mystruct_dt = numpy.dtype([('p', numpy.float64), ('row', numpy.float64), ('col', numpy.float64)]) mystruct = numpy_support.from_dtype(mystruct_dt) cres = compile_isolated(pyfunc, (mystruct[:], mystruct[:])) cfunc = cres.entry_point st1 = numpy.recarray(3, dtype=mystruct_dt) st2 = numpy.recarray(3, dtype=mystruct_dt) st1.p = numpy.arange(st1.size) + 1 st1.row = numpy.arange(st1.size) + 1 st1.col = numpy.arange(st1.size) + 1 st2.p = numpy.arange(st2.size) + 1 st2.row = numpy.arange(st2.size) + 1 st2.col = numpy.arange(st2.size) + 1 expect1 = st1.copy() expect2 = st2.copy() got1 = expect1.copy() got2 = expect2.copy() pyfunc(expect1, expect2) cfunc(got1, got2) self.assertTrue(numpy.all(expect1 == got1)) self.assertTrue(numpy.all(expect2 == got2))
def test_multiple_args_records(self): pyfunc = foobar mystruct_dt = np.dtype([('p', np.float64), ('row', np.float64), ('col', np.float64)]) mystruct = numpy_support.from_dtype(mystruct_dt) cres = compile_isolated(pyfunc, [mystruct[:], types.uint64, types.uint64], return_type=mystruct[:]) cfunc = cres.entry_point st1 = np.recarray(3, dtype=mystruct_dt) st1.p = np.arange(st1.size) + 1 st1.row = np.arange(st1.size) + 1 st1.col = np.arange(st1.size) + 1 with self.assertRefCount(st1): test_fail_args = ((st1, -1, 1), (st1, 1, -1)) for a, b, c in test_fail_args: with self.assertRaises(OverflowError): cfunc(a, b, c) del test_fail_args, a, b, c gc.collect()
def _evaluate(self): # Get argument values env = self.expr.env is_local = lambda x: x.startswith("__pd_eval_local_") call_args = [np.asarray(env.resolve(name, is_local(name))) for name in self._args] # Get argument types call_types = tuple(from_dtype(a.dtype) for a in call_args) # Check if the expression has already been compiled cache_key = (self.target, str(self.expr), call_types) fn = self._func_cache.get(cache_key) if fn is None: # Not cached. Compile new one # Stringify the eval tree and get arg names nameset = set() exprstr = _stringify_eval_op_tree(self.expr.terms, nameset) assert set(self._args) == nameset function_name = '__numba_pandas_eval_ufunc' fn = self._compile(exprstr, self._args, call_types, function_name) self._func_cache[cache_key] = fn # Execute return fn(*call_args)
def test_multiple_args_records(self): pyfunc = foobar mystruct_dt = np.dtype([('p', np.float64), ('row', np.float64), ('col', np.float64)]) mystruct = numpy_support.from_dtype(mystruct_dt) cres = compile_isolated(pyfunc, [mystruct[:], types.uint64, types.uint64], return_type=mystruct[:]) cfunc = cres.entry_point st1 = np.recarray(3, dtype=mystruct_dt) st1.p = np.arange(st1.size) + 1 st1.row = np.arange(st1.size) + 1 st1.col = np.arange(st1.size) + 1 old_refcnt_st1 = sys.getrefcount(st1) test_fail_args = ((st1, -1, 1), (st1, 1, -1)) # TypeError is for 2.6 exc_type = OverflowError if sys.version_info >= (2, 7) else TypeError for a, b, c in test_fail_args: with self.assertRaises(exc_type): cfunc(a, b, c) del test_fail_args, a, b, c gc.collect() self.assertEqual(sys.getrefcount(st1), old_refcnt_st1)
def resolve_value_type(self, val): """ Return the numba type of a Python value Return None if fail to type. """ tp = self.resolve_data_type(val) if tp is not None: return tp if isinstance(val, types.ExternalFunction): return val if isinstance(val, type) and issubclass(val, BaseException): return types.ExceptionType(val) if isinstance(val, numpy.dtype): tp = numpy_support.from_dtype(val) return types.DType(tp) try: # Try to look up target specific typing information return self.get_global_type(val) except KeyError: pass return None
def test_multiple_args_records(self): pyfunc = foobar mystruct_dt = np.dtype([('p', np.float64), ('row', np.float64), ('col', np.float64)]) mystruct = numpy_support.from_dtype(mystruct_dt) cres = compile_isolated(pyfunc, [mystruct[:], types.uint64, types.uint64], return_type=mystruct[:]) cfunc = cres.entry_point st1 = np.recarray(3, dtype=mystruct_dt) st2 = np.recarray(3, dtype=mystruct_dt) st1.p = np.arange(st1.size) + 1 st1.row = np.arange(st1.size) + 1 st1.col = np.arange(st1.size) + 1 st2.p = np.arange(st2.size) + 1 st2.row = np.arange(st2.size) + 1 st2.col = np.arange(st2.size) + 1 test_fail_args = ((st1, -1, st2), (st1, st2, -1)) # TypeError is for 2.6 if sys.version_info >= (2, 7): with self.assertRaises(OverflowError): for a, b, c in test_fail_args: cfunc(a, b, c) else: with self.assertRaises(TypeError): for a, b, c in test_fail_args: cfunc(a, b, c)
def _test_shared(self, arr): # Use a kernel that copies via shared memory to check loading and # storing different dtypes with shared memory. All threads in a block # collaborate to load in values, then the output values are written # only by the first thread in the block after synchronization. nelem = len(arr) nthreads = 16 nblocks = int(nelem / nthreads) dt = nps.from_dtype(arr.dtype) @cuda.jit def use_sm_chunk_copy(x, y): sm = cuda.shared.array(nthreads, dtype=dt) tx = cuda.threadIdx.x bx = cuda.blockIdx.x bd = cuda.blockDim.x # Load this block's chunk into shared i = bx * bd + tx if i < len(x): sm[tx] = x[i] cuda.syncthreads() # One thread per block writes this block's chunk if tx == 0: for j in range(nthreads): y[bd * bx + j] = sm[j] d_result = cuda.device_array_like(arr) use_sm_chunk_copy[nblocks, nthreads](arr, d_result) host_result = d_result.copy_to_host() np.testing.assert_array_equal(arr, host_result)
def transpose(a, b=None): """Compute the transpose of 'a' and store it into 'b', if given, and return it. If 'b' is not given, allocate a new array and return that. This implements the algorithm documented in http://devblogs.nvidia.com/parallelforall/efficient-matrix-transpose-cuda-cc/ :param a: an `np.ndarray` or a `DeviceNDArrayBase` subclass. If already on the device its stream will be used to perform the transpose (and to copy `b` to the device if necessary). """ # prefer `a`'s stream if stream = getattr(a, 'stream', 0) if not b: cols, rows = a.shape strides = a.dtype.itemsize * cols, a.dtype.itemsize b = cuda.cudadrv.devicearray.DeviceNDArray((rows, cols), strides, dtype=a.dtype, stream=stream) dt = nps.from_dtype(a.dtype) tpb = driver.get_device().MAX_THREADS_PER_BLOCK # we need to factor available threads into x and y axis tile_width = int(math.pow(2, math.log(tpb, 2) / 2)) tile_height = int(tpb / tile_width) tile_shape = (tile_height, tile_width + 1) @cuda.jit def kernel(input, output): tile = cuda.shared.array(shape=tile_shape, dtype=dt) tx = cuda.threadIdx.x ty = cuda.threadIdx.y bx = cuda.blockIdx.x * cuda.blockDim.x by = cuda.blockIdx.y * cuda.blockDim.y x = by + tx y = bx + ty if by + ty < input.shape[0] and bx + tx < input.shape[1]: tile[ty, tx] = input[by + ty, bx + tx] cuda.syncthreads() if y < output.shape[0] and x < output.shape[1]: output[y, x] = tile[tx, ty] # one block per tile, plus one for remainders blocks = int(b.shape[0] / tile_height + 1), int(b.shape[1] / tile_width + 1) # one thread per tile element threads = tile_height, tile_width kernel[blocks, threads, stream](a, b) return b
def resolve_value_type(self, val): """ Return the numba type of a Python value Return None if fail to type. """ if val is True or val is False: return types.boolean elif isinstance(val, utils.INT_TYPES + (float,)): return self.get_number_type(val) elif val is None: return types.none elif isinstance(val, str): return types.string elif isinstance(val, complex): return types.complex128 elif isinstance(val, tuple): tys = [self.resolve_value_type(v) for v in val] distinct_types = set(tys) if len(distinct_types) == 1: return types.UniTuple(tys[0], len(tys)) else: return types.Tuple(tys) elif numpy_support.is_arrayscalar(val): return numpy_support.map_arrayscalar_type(val) elif numpy_support.is_array(val): ary = val dtype = numpy_support.from_dtype(ary.dtype) # Force C contiguous return types.Array(dtype, ary.ndim, 'C') elif ctypes_utils.is_ctypes_funcptr(val): cfnptr = val return ctypes_utils.make_function_type(cfnptr) elif cffi_utils.SUPPORTED and cffi_utils.is_cffi_func(val): return cffi_utils.make_function_type(val) elif (cffi_utils.SUPPORTED and isinstance(val, cffi_utils.ExternCFunction)): return val elif type(val) is type and issubclass(val, BaseException): return types.exception_type else: try: # Try to look up target specific typing information return self.get_global_type(val) except KeyError: pass return None
def _compile(self, dtype): key = self._functor, dtype if key in self._cache: kernel = self._cache[key] else: kernel = _gpu_reduce_factory(self._functor, from_dtype(dtype)) self._cache[key] = kernel return kernel
def _test_rec_read(self, v, pyfunc, f): rec = self.sample1d.copy()[0] rec[f] = v arr = np.zeros(1, v.dtype) nbrecord = numpy_support.from_dtype(recordtype) cfunc = self.get_cfunc(pyfunc, (nbrecord,)) cfunc(rec, arr) np.testing.assert_equal(arr[0], v)
def test_records(self): recordtype = np.dtype([('a', np.float64), ('b', np.int32), ('c', np.complex64), ('d', (np.str, 5))]) ty = numpy_support.from_dtype(recordtype) self.check_pickling(ty) self.check_pickling(types.Array(ty, 1, 'A'))
def test_record_dtype_with_titles_roundtrip(self): recdtype = np.dtype([(("title a", 'a'), np.float), ('b', np.float)]) nbtype = numpy_support.from_dtype(recdtype) self.assertTrue(nbtype.is_title('title a')) self.assertFalse(nbtype.is_title('a')) self.assertFalse(nbtype.is_title('b')) got = numpy_support.as_dtype(nbtype) self.assertTrue(got, recdtype)
def check(dtype, fields, size, aligned): tp = numpy_support.from_dtype(dtype) self.assertIsInstance(tp, types.Record) # Only check for dtype equality, as the Numba type may be interned self.assertEqual(tp.dtype, dtype) self.assertEqual(tp.fields, fields) self.assertEqual(tp.size, size) self.assertEqual(tp.aligned, aligned)
def _typeof_ndarray(val, c): try: dtype = numpy_support.from_dtype(val.dtype) except NotImplementedError: raise ValueError("Unsupported array dtype: %s" % (val.dtype,)) layout = numpy_support.map_layout(val) readonly = not val.flags.writeable return types.Array(dtype, val.ndim, layout, readonly=readonly)
def _test_rec_read(self, v, pyfunc, f): rec = self.sample1d.copy()[0] rec[f] = v arr = np.zeros(1, v.dtype) nbrecord = numpy_support.from_dtype(recordtype) cfunc = self.get_cfunc(pyfunc, (nbrecord, )) cfunc(rec, arr) np.testing.assert_equal(arr[0], v)
def _typeof_ndarray(val, c): try: dtype = numpy_support.from_dtype(val.dtype) except NotImplementedError: return layout = numpy_support.map_layout(val) readonly = not val.flags.writeable return types.Array(dtype, val.ndim, layout, readonly=readonly)
def find_common_dtype_from_numpy_dtypes(array_types, scalar_types): """Used to find common numba dtype for a sequences of numba dtypes each representing some numpy dtype""" np_array_dtypes = [numpy_support.as_dtype(dtype) for dtype in array_types] np_scalar_dtypes = [numpy_support.as_dtype(dtype) for dtype in scalar_types] np_common_dtype = numpy.find_common_type(np_array_dtypes, np_scalar_dtypes) numba_common_dtype = numpy_support.from_dtype(np_common_dtype) return numba_common_dtype
def _test_get_two_equal(self, pyfunc): ''' Test with two arrays of the same type ''' rec = numpy_support.from_dtype(recordtype) cfunc = self.get_cfunc(pyfunc, (rec[:], rec[:], types.intp)) for i in range(self.refsample1d.size): self.assertEqual(pyfunc(self.refsample1d, self.refsample1d3, i), cfunc(self.nbsample1d, self.nbsample1d3, i))
def resolve_data_type(self, val): """ Return the numba type of a Python value representing data (e.g. a number or an array, but not more sophisticated types such as functions, etc.) This function can return None to if it cannot decide. """ if val is True or val is False: return types.boolean # Under 2.x, we must guard against numpy scalars (np.intXY # subclasses Python int but get_number_type() wouldn't infer the # right bit width -- perhaps it should?). elif (not isinstance(val, numpy.number) and isinstance(val, utils.INT_TYPES + (float,))): return self.get_number_type(val) elif val is None: return types.none elif isinstance(val, str): return types.string elif isinstance(val, complex): return types.complex128 elif isinstance(val, tuple): tys = [self.resolve_value_type(v) for v in val] distinct_types = set(tys) if len(distinct_types) == 1: return types.UniTuple(tys[0], len(tys)) else: return types.Tuple(tys) else: try: return numpy_support.map_arrayscalar_type(val) except NotImplementedError: pass if numpy_support.is_array(val): ary = val try: dtype = numpy_support.from_dtype(ary.dtype) except NotImplementedError: return if ary.flags.c_contiguous: layout = 'C' elif ary.flags.f_contiguous: layout = 'F' else: layout = 'A' return types.Array(dtype, ary.ndim, layout) return
def resolve_data_type(self, val): """ Return the numba type of a Python value representing data (e.g. a number or an array, but not more sophisticated types such as functions, etc.) This function can return None to if it cannot decide. """ if val is True or val is False: return types.boolean # Under 2.x, we must guard against numpy scalars (np.intXY # subclasses Python int but get_number_type() wouldn't infer the # right bit width -- perhaps it should?). elif (not isinstance(val, numpy.number) and isinstance(val, utils.INT_TYPES + (float, ))): return self.get_number_type(val) elif val is None: return types.none elif isinstance(val, str): return types.string elif isinstance(val, complex): return types.complex128 elif isinstance(val, tuple): tys = [self.resolve_value_type(v) for v in val] distinct_types = set(tys) if len(distinct_types) == 1: return types.UniTuple(tys[0], len(tys)) else: return types.Tuple(tys) else: try: return numpy_support.map_arrayscalar_type(val) except NotImplementedError: pass if numpy_support.is_array(val): ary = val try: dtype = numpy_support.from_dtype(ary.dtype) except NotImplementedError: return if ary.flags.c_contiguous: layout = 'C' elif ary.flags.f_contiguous: layout = 'F' else: layout = 'A' return types.Array(dtype, ary.ndim, layout) return
def binary_ufunc_test(self, ufunc_name, x_operands=None, y_operands=None, flags=enable_pyobj_flags): ufunc = globals()[ufunc_name + '_usecase'] arraytypes = [types.Array(types.int32, 1, 'C'), types.Array(types.int64, 1, 'C'), types.Array(types.float32, 1, 'C'), types.Array(types.float64, 1, 'C')] if x_operands == None: x_operands = [np.arange(-10, 10, dtype='i4'), np.arange(-10, 10, dtype='i8'), np.arange(-1, 1, 0.1, dtype='f4'), np.arange(-1, 1, 0.1, dtype='f8')] if y_operands == None: y_operands = [np.arange(-10, 10, dtype='i4'), np.arange(-10, 10, dtype='i8'), np.arange(-1, 1, 0.1, dtype='f4'), np.arange(-1, 1, 0.1, dtype='f8')] for arraytype, x_operand, y_operand in zip(arraytypes, x_operands, y_operands): pyfunc = ufunc numpy_ufunc = getattr(np, ufunc_name) result_dtype = numpy_ufunc(x_operand, y_operand).dtype result_arraytype = types.Array(from_dtype(result_dtype), arraytype.ndim, arraytype.layout) cr = compile_isolated(pyfunc, (arraytype, arraytype, result_arraytype), flags=flags) cfunc = cr.entry_point result = np.zeros(x_operand.size, dtype=result_dtype) cfunc(x_operand, y_operand, result) expected = np.zeros(x_operand.size, dtype=result_dtype) ufunc(x_operand, y_operand, expected) # Need special checks if NaNs are in results if np.isnan(expected).any() or np.isnan(result).any(): self.assertTrue(np.allclose(np.isnan(result), np.isnan(expected))) if not np.isnan(expected).all() and not np.isnan(result).all(): if result_dtype.kind == 'f': self.assertTrue(np.allclose(result[np.invert(np.isnan(result))], expected[np.invert(np.isnan(expected))])) else: self.assertTrue((result[np.invert(np.isnan(result))] == expected[np.invert(np.isnan(expected))]).all()) else: if result_dtype.kind == 'f': self.assertTrue(np.allclose(result, expected)) else: self.assertTrue((result == expected).all())
def _compile(cls, dtype): nbtype = numpy_support.from_dtype(dtype) @cuda.jit def gpu_unique_k(arr, k, out, outsz_ptr): """ Note: run with small blocks. """ tid = cuda.threadIdx.x blksz = cuda.blockDim.x base = 0 # shared memory vset_size = 0 sm_mem_size = MAX_FAST_UNIQUE_K vset = cuda.shared.array(sm_mem_size, dtype=nbtype) share_vset_size = cuda.shared.array(1, dtype=int32) share_loaded = cuda.shared.array(sm_mem_size, dtype=nbtype) sm_mem_size = min(k, sm_mem_size) while vset_size < sm_mem_size and base < arr.size: pos = base + tid valid_load = min(blksz, arr.size - base) # load if tid < valid_load: share_loaded[tid] = arr[pos] # wait for load to complete cuda.syncthreads() # thread-0 inserts if tid == 0: for i in range(valid_load): val = share_loaded[i] new_size = gpu_unique_set_insert(vset, vset_size, val) if new_size >= 0: vset_size = new_size else: vset_size = sm_mem_size + 1 share_vset_size[0] = vset_size # wait until the insert is done cuda.syncthreads() vset_size = share_vset_size[0] # increment base += blksz # output if vset_size <= sm_mem_size: for i in range(tid, vset_size, blksz): out[i] = vset[i] if tid == 0: outsz_ptr[0] = vset_size else: outsz_ptr[0] = -1 # cache cls._cached_kernels[dtype] = gpu_unique_k return gpu_unique_k
def _check_device_record(self, reference, rec): self.assertEqual(rec.shape, tuple()) self.assertEqual(rec.strides, tuple()) self.assertEqual(rec.dtype, reference.dtype) self.assertEqual(rec.alloc_size, reference.dtype.itemsize) self.assertIsNotNone(rec.gpu_data) self.assertNotEqual(rec.device_ctypes_pointer, ctypes.c_void_p(0)) numba_type = numpy_support.from_dtype(reference.dtype) self.assertEqual(rec._numba_type_, numba_type)
def _type_and_size(self, dary, size): nbtype = from_dtype(dary.dtype) if size is None: # Use the array size if the `size` is not defined size = dary.size if size > dary.size: raise ValueError("size > array.size") return nbtype, size
def test_from_dtype(self): rec = numpy_support.from_dtype(recordtype) self.assertEqual(rec.typeof('a'), types.float64) self.assertEqual(rec.typeof('b'), types.int16) self.assertEqual(rec.typeof('c'), types.complex64) self.assertEqual(rec.typeof('d'), types.UnicodeCharSeq(5)) self.assertEqual(rec.offset('a'), recordtype.fields['a'][1]) self.assertEqual(rec.offset('b'), recordtype.fields['b'][1]) self.assertEqual(rec.offset('c'), recordtype.fields['c'][1]) self.assertEqual(rec.offset('d'), recordtype.fields['d'][1]) self.assertEqual(recordtype.itemsize, rec.size)
def typeof_type(val, c): """ Type various specific Python types. """ if issubclass(val, BaseException): return types.ExceptionClass(val) if issubclass(val, tuple) and hasattr(val, "_asdict"): return types.NamedTupleClass(val) if issubclass(val, np.generic): return types.NumberClass(numpy_support.from_dtype(val))
def test_array_1d_record(self, flags=force_pyobj_flags): pyfunc = record_iter_usecase item_type = numpy_support.from_dtype(record_dtype) cr = compile_isolated(pyfunc, (types.Array(item_type, 1, 'A'),), flags=flags) cfunc = cr.entry_point arr = np.recarray(3, dtype=record_dtype) for i in range(3): arr[i].a = float(i * 2) arr[i].b = i + 2 got = pyfunc(arr) self.assertPreciseEqual(cfunc(arr), got)
def _test_set_equal(self, pyfunc, value, valuetype): rec = numpy_support.from_dtype(recordtype) cfunc = self.get_cfunc(pyfunc, (rec[:], types.intp, valuetype)) for i in range(self.refsample1d.size): expect = self.refsample1d.copy() pyfunc(expect, i, value) got = self.nbsample1d.copy() cfunc(got, i, value) # Match the entire array to ensure no memory corruption np.testing.assert_equal(expect, got)
def test_ufunc(self): datetime_t = from_dtype(np.dtype('datetime64[D]')) @vectorize([(datetime_t, datetime_t)], target='cuda') def timediff(start, end): return end - start arr1 = np.arange('2005-02', '2006-02', dtype='datetime64[D]') arr2 = arr1 + np.random.randint(0, 10000, arr1.size) delta = timediff(arr1, arr2) self.assertPreciseEqual(delta, arr2 - arr1)
def unary_ufunc_test(self, ufunc_name, operands=None, flags=enable_pyobj_flags): ufunc = globals()[ufunc_name + '_usecase'] arraytypes = [ types.Array(types.int32, 1, 'C'), types.Array(types.int64, 1, 'C'), types.Array(types.float32, 1, 'C'), types.Array(types.float64, 1, 'C') ] if operands == None: operands = [ np.arange(-10, 10, dtype='i4'), np.arange(-10, 10, dtype='i8'), np.arange(-1, 1, 0.1, dtype='f4'), np.arange(-1, 1, 0.1, dtype='f8') ] for arraytype, operand in zip(arraytypes, operands): pyfunc = ufunc numpy_ufunc = getattr(np, ufunc_name) result_dtype = numpy_ufunc(operand).dtype result_arraytype = types.Array(from_dtype(result_dtype), arraytype.ndim, arraytype.layout) cr = compile_isolated(pyfunc, (arraytype, result_arraytype), flags=flags) cfunc = cr.entry_point result = np.zeros(operand.size, dtype=result_dtype) cfunc(operand, result) expected = np.zeros(operand.size, dtype=result_dtype) ufunc(operand, expected) # Need special checks if NaNs are in results if np.isnan(expected).any() or np.isnan(result).any(): self.assertTrue( np.allclose(np.isnan(result), np.isnan(expected))) if not np.isnan(expected).all() and not np.isnan(result).all(): self.assertTrue( np.allclose(result[np.invert(np.isnan(result))], expected[np.invert(np.isnan(expected))])) else: self.assertTrue( np.all(result == expected) or np.allclose(result, expected))
def test_record_read_2d_array(self): ''' Test reading from a 2D array within a structured type ''' rec = self.samplerec2darr.copy() rec['j'][:] = np.asarray([5.0, 6.0, 7.0, 8.0, 9.0, 10.0], np.float32).reshape(3, 2) nbrecord = numpy_support.from_dtype(recordwith2darray) cfunc = self.get_cfunc(record_read_2d_array, (nbrecord, )) arr = np.zeros((3, 2), dtype=rec['j'].dtype) cfunc(rec, arr) np.testing.assert_equal(rec['j'], arr)
def test_record_read_1d_array(self): ''' Test reading from a 1D array within a structured type ''' rec = self.samplerec1darr.copy() rec['h'][0] = 4.0 rec['h'][1] = 5.0 nbrecord = numpy_support.from_dtype(recordwitharray) cfunc = self.get_cfunc(record_read_array, (nbrecord, )) arr = np.zeros(2, dtype=rec['h'].dtype) cfunc(rec, arr) np.testing.assert_equal(rec['h'], arr)
def test_record_write_array(self): ''' Testing writing to a 1D array within a structured type ''' nbval = np.recarray(1, dtype=recordwitharray) nbrecord = numpy_support.from_dtype(recordwitharray) cfunc = self.get_cfunc(record_write_array, (nbrecord, )) cfunc(nbval[0]) expected = np.recarray(1, dtype=recordwitharray) expected[0].g = 2 expected[0].h[0] = 3.0 expected[0].h[1] = 4.0 np.testing.assert_equal(expected, nbval)
def test_record_write_2d_array(self): ''' Test writing to a 2D array within a structured type ''' nbval = np.recarray(1, dtype=recordwith2darray) nbrecord = numpy_support.from_dtype(recordwith2darray) cfunc = self.get_cfunc(record_write_2d_array, (nbrecord, )) cfunc(nbval[0]) expected = np.recarray(1, dtype=recordwith2darray) expected[0].i = 3 expected[0].j[:] = np.asarray([5.0, 6.0, 7.0, 8.0, 9.0, 10.0], np.float32).reshape(3, 2) np.testing.assert_equal(expected, nbval)