def _check_device_record(self, reference, rec): self.assertEqual(rec.shape, tuple()) self.assertEqual(rec.strides, tuple()) self.assertEqual(rec.dtype, reference.dtype) self.assertEqual(rec.alloc_size, reference.dtype.itemsize) self.assertIsNotNone(rec.gpu_data) self.assertNotEqual(rec.device_ctypes_pointer, ctypes.c_void_p(0)) numba_type = numpy_support.from_dtype(reference.dtype) self.assertEqual(rec._numba_type_, numba_type)
def find_common_dtype_from_numpy_dtypes(array_types, scalar_types): """Used to find common numba dtype for a sequences of numba dtypes each representing some numpy dtype""" np_array_dtypes = [numpy_support.as_dtype(dtype) for dtype in array_types] np_scalar_dtypes = [ numpy_support.as_dtype(dtype) for dtype in scalar_types ] np_common_dtype = numpy.find_common_type(np_array_dtypes, np_scalar_dtypes) numba_common_dtype = numpy_support.from_dtype(np_common_dtype) return numba_common_dtype
def test_records(self): recordtype = np.dtype([ ("a", np.float64), ("b", np.int32), ("c", np.complex64), ("d", (np.str, 5)), ]) ty = numpy_support.from_dtype(recordtype) self.check_pickling(ty) self.check_pickling(types.Array(ty, 1, "A"))
def _test_get_two_equal(self, pyfunc): """ Test with two arrays of the same type """ rec = numpy_support.from_dtype(recordtype) cfunc = self.get_cfunc(pyfunc, (rec[:], rec[:], types.intp)) for i in range(self.refsample1d.size): self.assertEqual( pyfunc(self.refsample1d, self.refsample1d3, i), cfunc(self.nbsample1d, self.nbsample1d3, i), )
def test_array_1d_record(self, flags=force_pyobj_flags): pyfunc = record_iter_usecase item_type = numpy_support.from_dtype(record_dtype) cr = compile_isolated(pyfunc, (types.Array(item_type, 1, "A"),), flags=flags) cfunc = cr.entry_point arr = np.recarray(3, dtype=record_dtype) for i in range(3): arr[i].a = float(i * 2) arr[i].b = i + 2 got = pyfunc(arr) self.assertPreciseEqual(cfunc(arr), got)
def test_from_dtype(self): rec = numpy_support.from_dtype(recordtype) self.assertEqual(rec.typeof('a'), types.float64) self.assertEqual(rec.typeof('b'), types.int16) self.assertEqual(rec.typeof('c'), types.complex64) self.assertEqual(rec.typeof('d'), types.UnicodeCharSeq(5)) self.assertEqual(rec.offset('a'), recordtype.fields['a'][1]) self.assertEqual(rec.offset('b'), recordtype.fields['b'][1]) self.assertEqual(rec.offset('c'), recordtype.fields['c'][1]) self.assertEqual(rec.offset('d'), recordtype.fields['d'][1]) self.assertEqual(recordtype.itemsize, rec.size)
def test_from_dtype(self): rec = numpy_support.from_dtype(recordtype) self.assertEqual(rec.typeof("a"), types.float64) self.assertEqual(rec.typeof("b"), types.int16) self.assertEqual(rec.typeof("c"), types.complex64) self.assertEqual(rec.typeof("d"), types.UnicodeCharSeq(5)) self.assertEqual(rec.offset("a"), recordtype.fields["a"][1]) self.assertEqual(rec.offset("b"), recordtype.fields["b"][1]) self.assertEqual(rec.offset("c"), recordtype.fields["c"][1]) self.assertEqual(rec.offset("d"), recordtype.fields["d"][1]) self.assertEqual(recordtype.itemsize, rec.size)
def test_getitem_static_int_index(self): self._test_get_equal(getitem_0) self._test_get_equal(getitem_1) self._test_get_equal(getitem_2) # this exception on accessing OOB integer index rec = numpy_support.from_dtype(recordtype) with self.assertRaises(TypingError) as raises: self.get_cfunc(getitem_10, (rec[:], types.intp)) msg = "Requested index 10 is out of range" self.assertIn(msg, str(raises.exception))
def unify(self, typingctx, other): """ Unify the two number types using Numpy's rules. """ from numba.np import numpy_support if isinstance(other, Number): # XXX: this can produce unsafe conversions, # e.g. would unify {int64, uint64} to float64 a = numpy_support.as_dtype(self) b = numpy_support.as_dtype(other) sel = np.promote_types(a, b) return numpy_support.from_dtype(sel)
def _masked_array_type_from_col(col): """ Return a type representing a tuple of arrays, the first element an array of the numba type corresponding to `dtype`, and the second an array of bools representing a mask. """ nb_scalar_ty = numpy_support.from_dtype(col.dtype) if col.mask is None: return nb_scalar_ty[::1] else: return Tuple((nb_scalar_ty[::1], libcudf_bitmask_type[::1]))
def _make_dtype_object(typingctx, desc): """Given a string or NumberClass description *desc*, returns the dtype object.""" def from_nb_type(nb_type): return_type = types.DType(nb_type) sig = return_type(desc) def codegen(context, builder, signature, args): # All dtype objects are dummy values in LLVM. # They only exist in the type level. return context.get_dummy_value() return sig, codegen if isinstance(desc, types.Literal): # Convert the str description into np.dtype then to numba type. nb_type = from_dtype(np.dtype(desc.literal_value)) return from_nb_type(nb_type) elif isinstance(desc, types.functions.NumberClass): thestr = str(desc.dtype) # Convert the str description into np.dtype then to numba type. nb_type = from_dtype(np.dtype(thestr)) return from_nb_type(nb_type)
def _test_set_equal(self, pyfunc, value, valuetype): rec = numpy_support.from_dtype(recordtype) cfunc = self.get_cfunc(pyfunc, (rec[:], types.intp, valuetype)) for i in range(self.refsample1d.size): expect = self.refsample1d.copy() pyfunc(expect, i, value) got = self.nbsample1d.copy() cfunc(got, i, value) # Match the entire array to ensure no memory corruption np.testing.assert_equal(expect, got)
def parse_dtype(dtype): """ Return the dtype of a type, if it is either a DtypeSpec (used for most dtypes) or a TypeRef (used for record types). """ if isinstance(dtype, types.DTypeSpec): return dtype.dtype elif isinstance(dtype, types.TypeRef): return dtype.instance_type elif isinstance(dtype, types.StringLiteral): dt = getattr(np, dtype.literal_value, None) if dt is not None: return from_dtype(dt)
def test_ufunc(self): datetime_t = from_dtype(np.dtype('datetime64[D]')) @vectorize([(datetime_t, datetime_t)], target='cuda') def timediff(start, end): return end - start arr1 = np.arange('2005-02', '2006-02', dtype='datetime64[D]') arr2 = arr1 + np.random.randint(0, 10000, arr1.size) delta = timediff(arr1, arr2) self.assertPreciseEqual(delta, arr2 - arr1)
def test_record_write_2d_array(self): ''' Test writing to a 2D array within a structured type ''' nbval = np.recarray(1, dtype=recordwith2darray) nbrecord = numpy_support.from_dtype(recordwith2darray) cfunc = self.get_cfunc(record_write_2d_array, (nbrecord,)) cfunc(nbval[0]) expected = np.recarray(1, dtype=recordwith2darray) expected[0].i = 3 expected[0].j[:] = np.asarray([5.0, 6.0, 7.0, 8.0, 9.0, 10.0], np.float32).reshape(3, 2) np.testing.assert_equal(expected, nbval)
def test_array_1d_record_mutate_npm(self, flags=no_pyobj_flags): pyfunc = record_iter_mutate_usecase item_type = numpy_support.from_dtype(record_dtype) cr = compile_isolated(pyfunc, (types.Array(item_type, 1, "A"),), flags=flags) cfunc = cr.entry_point arr = np.recarray(3, dtype=record_dtype) for i in range(3): arr[i].a = float(i * 2) arr[i].b = i + 2 expected = arr.copy() pyfunc(expected) got = arr.copy() cfunc(got) self.assertPreciseEqual(expected, got)
def test_record_read_1d_array(self): ''' Test reading from a 1D array within a structured type ''' rec = self.samplerec1darr.copy() rec['h'][0] = 4.0 rec['h'][1] = 5.0 nbrecord = numpy_support.from_dtype(recordwitharray) cfunc = self.get_cfunc(record_read_array, (nbrecord,)) arr = np.zeros(2, dtype=rec['h'].dtype) cfunc(rec, arr) np.testing.assert_equal(rec['h'], arr)
def test_record_read_2d_array(self): ''' Test reading from a 2D array within a structured type ''' rec = self.samplerec2darr.copy() rec['j'][:] = np.asarray([5.0, 6.0, 7.0, 8.0, 9.0, 10.0], np.float32).reshape(3, 2) nbrecord = numpy_support.from_dtype(recordwith2darray) cfunc = self.get_cfunc(record_read_2d_array, (nbrecord,)) arr = np.zeros((3,2), dtype=rec['j'].dtype) cfunc(rec, arr) np.testing.assert_equal(rec['j'], arr)
def test_record_write_array(self): ''' Testing writing to a 1D array within a structured type ''' nbval = np.recarray(1, dtype=recordwitharray) nbrecord = numpy_support.from_dtype(recordwitharray) cfunc = self.get_cfunc(record_write_array, (nbrecord,)) cfunc(nbval[0]) expected = np.recarray(1, dtype=recordwitharray) expected[0].g = 2 expected[0].h[0] = 3.0 expected[0].h[1] = 4.0 np.testing.assert_equal(expected, nbval)
def test_record_write_2d_array(self): ''' Test writing to a 2D array within a structured type ''' rec = self.samplerec2darr.copy() nbrecord = numpy_support.from_dtype(recordwith2darray) cfunc = self.get_cfunc(record_write_2d_array, (nbrecord,)) cfunc(rec) expected = self.samplerec2darr.copy() expected['i'] = 3 expected['j'][:] = np.asarray([5.0, 6.0, 7.0, 8.0, 9.0, 10.0], np.float32).reshape(3, 2) np.testing.assert_equal(expected, rec)
def _numba_type_(self): layout_type = self.layout._numba_type_ cache = layout_type._cache dt = self.value.dtype # now use the dtype to key that cache. try: return cache[dt] except KeyError: # Computing and hashing `dtype_type` is slow, so we do not use it as a # hash key. The raw numpy dtype is much faster to use as a key. dtype_type = _numpy_support.from_dtype(dt) ret = cache[dt] = MultiVectorType(layout_type, dtype_type) return ret
def test_record_write_1d_array(self): ''' Test writing to a 1D array within a structured type ''' rec = self.samplerec1darr.copy() nbrecord = numpy_support.from_dtype(recordwitharray) cfunc = self.get_cfunc(record_write_array, (nbrecord,)) cfunc(rec) expected = self.samplerec1darr.copy() expected['g'] = 2 expected['h'][0] = 3.0 expected['h'][1] = 4.0 np.testing.assert_equal(expected, rec)
def test_record_read_array(self): ''' Test reading from a 1D array within a structured type ''' nbval = np.recarray(1, dtype=recordwitharray) nbval[0].h[0] = 15.0 nbval[0].h[1] = 25.0 nbrecord = numpy_support.from_dtype(recordwitharray) cfunc = self.get_cfunc(record_read_array0, (nbrecord,)) res = cfunc(nbval[0]) np.testing.assert_equal(res, nbval[0].h[0]) cfunc = self.get_cfunc(record_read_array1, (nbrecord,)) res = cfunc(nbval[0]) np.testing.assert_equal(res, nbval[0].h[1])
def _test_usecase2to5(self, pyfunc, dtype): array = self._setup_usecase2to5(dtype) record_type = numpy_support.from_dtype(dtype) cres = compile_isolated(pyfunc, (record_type[:], types.intp)) cfunc = cres.entry_point with captured_stdout(): pyfunc(array, len(array)) expect = sys.stdout.getvalue() with captured_stdout(): cfunc(array, len(array)) got = sys.stdout.getvalue() self.assertEqual(expect, got)
def _get_frame_row_type(dtype): """ Get the numba `Record` type corresponding to a frame. Models each column and its mask as a MaskedType and models the row as a dictionary like data structure containing these MaskedTypes. Large parts of this function are copied with comments from the Numba internals and slightly modified to account for validity bools to be present in the final struct. See numba.np.numpy_support.from_struct_dtype for details. """ # Create the numpy structured type corresponding to the numpy dtype. fields = [] offset = 0 sizes = [val[0].itemsize for val in dtype.fields.values()] for i, (name, info) in enumerate(dtype.fields.items()): # *info* consists of the element dtype, its offset from the beginning # of the record, and an optional "title" containing metadata. # We ignore the offset in info because its value assumes no masking; # instead, we compute the correct offset based on the masked type. elemdtype = info[0] title = info[2] if len(info) == 3 else None ty = numpy_support.from_dtype(elemdtype) infos = { "type": MaskedType(ty), "offset": offset, "title": title, } fields.append((name, infos)) # increment offset by itemsize plus one byte for validity offset += elemdtype.itemsize + 1 # Align the next member of the struct to be a multiple of the # memory access size, per PTX ISA 7.4/5.4.5 if i < len(sizes) - 1: next_itemsize = sizes[i + 1] offset = int(math.ceil(offset / next_itemsize) * next_itemsize) # Numba requires that structures are aligned for the CUDA target _is_aligned_struct = True return Record(fields, offset, _is_aligned_struct)
def _test_record_args(self, revargs): """ Testing scalar record value as argument """ npval = self.refsample1d.copy()[0] nbval = self.nbsample1d.copy()[0] attrs = "abc" valtypes = types.float64, types.int16, types.complex64 values = 1.23, 12345, 123 + 456j old_refcnt = sys.getrefcount(nbval) for attr, valtyp, val in zip(attrs, valtypes, values): expected = getattr(npval, attr) nbrecord = numpy_support.from_dtype(recordtype) # Test with a record as either the first argument or the second # argument (issue #870) if revargs: prefix = "get_record_rev_" argtypes = (valtyp, nbrecord) args = (val, nbval) else: prefix = "get_record_" argtypes = (nbrecord, valtyp) args = (nbval, val) pyfunc = globals()[prefix + attr] cfunc = self.get_cfunc(pyfunc, argtypes) got = cfunc(*args) try: self.assertEqual(expected, got) except AssertionError: # On ARM, a LLVM misoptimization can produce buggy code, # see https://llvm.org/bugs/show_bug.cgi?id=24669 import llvmlite.binding as ll if attr != "c": raise if ll.get_default_triple() != "armv7l-unknown-linux-gnueabihf": raise self.assertEqual(val, got) else: self.assertEqual(nbval[attr], val) del got, expected, args # Check for potential leaks (issue #441) self.assertEqual(sys.getrefcount(nbval), old_refcnt)
def _infer_index_type(index): ''' Convertion input index type into Numba known type need to return instance of the type class ''' if isinstance(index, (types.NoneType, pd.RangeIndex, pd.DatetimeIndex)) or index is None or len(index) == 0: return types.none if index.dtype == np.dtype('O') and len(index) > 0: first_val = index[0] if isinstance(first_val, str): return string_array_type numba_index_type = numpy_support.from_dtype(index.dtype) return types.Array(numba_index_type, 1, 'C')
def compile_masked_udf(func, dtypes): """ Generate an inlineable PTX function that will be injected into a variadic kernel inside libcudf assume all input types are `MaskedType(input_col.dtype)` and then compile the requestied PTX function as a function over those types """ to_compiler_sig = tuple( MaskedType(arg) for arg in (numpy_support.from_dtype(np_type) for np_type in dtypes)) # Get the inlineable PTX function ptx, numba_output_type = cudautils.compile_udf(func, to_compiler_sig) numpy_output_type = numpy_support.as_dtype(numba_output_type.value_type) return numpy_output_type, ptx
def parse_dtype(dtype): """ Return the dtype of a type, if it is either a DtypeSpec (used for most dtypes) or a TypeRef (used for record types). """ if isinstance(dtype, types.DTypeSpec): return dtype.dtype elif isinstance(dtype, types.TypeRef): return dtype.instance_type elif isinstance(dtype, types.StringLiteral): dtstr = dtype.literal_value try: dt = np.dtype(dtstr) except TypeError: msg = f"Invalid NumPy dtype specified: '{dtstr}'" raise TypingError(msg) return from_dtype(dt)
def test_set_record(self): pyfunc = set_record rec = numpy_support.from_dtype(recordtype) cfunc = self.get_cfunc(pyfunc, (rec[:], types.intp, types.intp)) test_indices = [(0, 1), (1, 2), (0, 2)] for i, j in test_indices: expect = self.sample1d.copy() pyfunc(expect, i, j) got = self.sample1d.copy() cfunc(got, i, j) # Match the entire array to ensure no memory corruption self.assertEqual(expect[i], expect[j]) self.assertEqual(got[i], got[j]) self.assertTrue(np.all(expect == got))